From 342eb45a2fd19869273ec038144c71ac6e49db0e Mon Sep 17 00:00:00 2001
From: Stefan Behnel <scoder@users.berlios.de>
Date: Mon, 3 Jan 2011 22:36:35 +0100
Subject: [PATCH] fix ticket #602: let bytes literals start off as 'bytes'
 instead of 'char*'

---
 Cython/Compiler/ExprNodes.py        | 46 +++++++++++++++--------------
 Cython/Compiler/PyrexTypes.py       | 15 +++++++++-
 tests/errors/string_assignments.pyx |  6 ++--
 tests/run/type_inference.pyx        |  8 ++---
 4 files changed, 45 insertions(+), 30 deletions(-)

diff --git a/Cython/Compiler/ExprNodes.py b/Cython/Compiler/ExprNodes.py
index 366145cfd..39b936302 100755
--- a/Cython/Compiler/ExprNodes.py
+++ b/Cython/Compiler/ExprNodes.py
@@ -948,7 +948,8 @@ class BytesNode(ConstNode):
     #
     # value      BytesLiteral
 
-    type = PyrexTypes.c_char_ptr_type
+    # start off as Python 'bytes' to support len() in O(1)
+    type = bytes_type
 
     def compile_time_value(self, denv):
         return self.value
@@ -969,11 +970,13 @@ class BytesNode(ConstNode):
         return len(self.value) == 1
 
     def coerce_to_boolean(self, env):
-        # This is special because we start off as a C char*.  Testing
-        # that for truth directly would yield the wrong result.
+        # This is special because testing a C char* for truth directly
+        # would yield the wrong result.
         return BoolNode(self.pos, value=bool(self.value))
 
     def coerce_to(self, dst_type, env):
+        if self.type == dst_type:
+            return self
         if dst_type.is_int:
             if not self.can_coerce_to_char_literal():
                 error(self.pos, "Only single-character string literals can be coerced into ints.")
@@ -984,21 +987,20 @@ class BytesNode(ConstNode):
             return CharNode(self.pos, value=self.value)
 
         node = BytesNode(self.pos, value=self.value)
-        if dst_type == PyrexTypes.c_char_ptr_type:
-            node.type = PyrexTypes.c_char_ptr_type
+        if dst_type.is_pyobject:
+            if dst_type in (py_object_type, Builtin.bytes_type):
+                node.type = Builtin.bytes_type
+            else:
+                self.check_for_coercion_error(dst_type, fail=True)
+                return node
+        elif dst_type == PyrexTypes.c_char_ptr_type:
+            node.type = dst_type
             return node
         elif dst_type == PyrexTypes.c_uchar_ptr_type:
             node.type = PyrexTypes.c_char_ptr_type
             return CastNode(node, PyrexTypes.c_uchar_ptr_type)
-
-        if not self.type.is_pyobject:
-            if dst_type in (py_object_type, Builtin.bytes_type):
-                node.type = Builtin.bytes_type
-            elif dst_type.is_pyobject:
-                self.fail_assignment(dst_type)
-                return self
-        elif dst_type.is_pyobject and dst_type is not py_object_type:
-            self.check_for_coercion_error(dst_type, fail=True)
+        elif dst_type.assignable_from(PyrexTypes.c_char_ptr_type):
+            node.type = dst_type
             return node
 
         # We still need to perform normal coerce_to processing on the
@@ -1006,11 +1008,6 @@ class BytesNode(ConstNode):
         # in which case a type test node will be needed.
         return ConstNode.coerce_to(node, dst_type, env)
 
-    def as_py_string_node(self, env):
-        # Return a new BytesNode with the same value as this node
-        # but whose type is a Python type instead of a C type.
-        return BytesNode(self.pos, value = self.value, type = Builtin.bytes_type)
-
     def generate_evaluation_code(self, code):
         if self.type.is_pyobject:
             self.result_code = code.get_py_string_const(self.value)
@@ -2962,9 +2959,14 @@ class SimpleCallNode(CallNode):
                 arg = arg.coerce_to_temp(env)
             self.args[i] = arg
         for i in range(max_nargs, actual_nargs):
-            if self.args[i].type.is_pyobject:
-                error(self.args[i].pos,
-                    "Python object cannot be passed as a varargs parameter")
+            arg = self.args[i]
+            if arg.type.is_pyobject:
+                arg_ctype = arg.type.default_coerced_ctype()
+                if arg_ctype is None:
+                    error(self.args[i].pos,
+                          "Python object cannot be passed as a varargs parameter")
+                else:
+                    self.args[i] = arg.coerce_to(arg_ctype, env)
         # Calc result type and code fragment
         if isinstance(self.function, NewExprNode):
             self.type = PyrexTypes.CPtrType(self.function.class_type)
diff --git a/Cython/Compiler/PyrexTypes.py b/Cython/Compiler/PyrexTypes.py
index bde735672..39dd92030 100755
--- a/Cython/Compiler/PyrexTypes.py
+++ b/Cython/Compiler/PyrexTypes.py
@@ -353,6 +353,10 @@ class PyObjectType(PyrexType):
     def can_coerce_to_pyobject(self, env):
         return True
 
+    def default_coerced_ctype(self):
+        "The default C type that this Python type coerces to, or None."
+        return None
+
     def assignable_from(self, src_type):
         # except for pointers, conversion will be attempted
         return not src_type.is_ptr or src_type.is_string
@@ -403,7 +407,16 @@ class BuiltinObjectType(PyObjectType):
     
     def __repr__(self):
         return "<%s>"% self.cname
-        
+
+    def default_coerced_ctype(self):
+        if self.name == 'bytes':
+            return c_char_ptr_type
+        elif self.name == 'bool':
+            return c_bint_type
+        elif self.name == 'float':
+            return c_double_type
+        return None
+
     def assignable_from(self, src_type):
         if isinstance(src_type, BuiltinObjectType):
             return src_type.name == self.name
diff --git a/tests/errors/string_assignments.pyx b/tests/errors/string_assignments.pyx
index 1b5a13411..8cc2abf5a 100644
--- a/tests/errors/string_assignments.pyx
+++ b/tests/errors/string_assignments.pyx
@@ -58,18 +58,18 @@ _ERRORS = u"""
 30:22: Cannot convert Unicode string to 'bytes' implicitly, encoding required.
 31:22: Cannot convert 'str' to 'bytes' implicitly. This is not portable.
 
-33:17: Cannot assign type 'char *' to 'str object'
+33:17: Cannot convert 'bytes' object to str implicitly. This is not portable to Py3.
 34:19: Cannot convert 'bytes' object to str implicitly. This is not portable to Py3.
 35:17: Cannot convert Unicode string to 'str' implicitly. This is not portable and requires explicit encoding.
 36:19: Cannot convert Unicode string to 'str' implicitly. This is not portable and requires explicit encoding.
 
 38:20: str objects do not support coercion to unicode, use a unicode string literal instead (u'')
 39:22: str objects do not support coercion to unicode, use a unicode string literal instead (u'')
-40:20: Cannot assign type 'char *' to 'unicode object'
+40:20: Cannot convert 'bytes' object to unicode implicitly, decoding required
 41:22: Cannot convert 'bytes' object to unicode implicitly, decoding required
 42:22: Cannot convert 'char*' to unicode implicitly, decoding required
 
 44:19: Cannot assign type 'str object' to 'tuple object'
 45:18: Cannot assign type 'unicode object' to 'tuple object'
-46:18: Cannot assign type 'char *' to 'tuple object'
+46:18: Cannot assign type 'bytes object' to 'tuple object'
 """
diff --git a/tests/run/type_inference.pyx b/tests/run/type_inference.pyx
index 5ffe197dd..b411d5b59 100644
--- a/tests/run/type_inference.pyx
+++ b/tests/run/type_inference.pyx
@@ -25,7 +25,7 @@ def simple():
     xptrptr = &xptr
     assert typeof(xptrptr) == "double **", typeof(xptrptr)
     b = b"abc"
-    assert typeof(b) == "char *", typeof(b)
+    assert typeof(b) == "bytes object", typeof(b)
     s = "abc"
     assert typeof(s) == "str object", typeof(s)
     u = u"xyz"
@@ -57,7 +57,7 @@ def slicing():
     >>> slicing()
     """
     b = b"abc"
-    assert typeof(b) == "char *", typeof(b)
+    assert typeof(b) == "bytes object", typeof(b)
     b1 = b[1:2]
     assert typeof(b1) == "bytes object", typeof(b1)
     b2 = b[1:2:2]
@@ -92,9 +92,9 @@ def indexing():
     >>> indexing()
     """
     b = b"abc"
-    assert typeof(b) == "char *", typeof(b)
+    assert typeof(b) == "bytes object", typeof(b)
     b1 = b[1]
-    assert typeof(b1) == "char", typeof(b1)  # FIXME: Python object ??
+    assert typeof(b1) == "Python object", typeof(b1)
     u = u"xyz"
     assert typeof(u) == "unicode object", typeof(u)
     u1 = u[1]
-- 
2.30.9