Merge branch 'master' of github.com:cython/cython

d89dd2a9 · Robert Bradshaw · 8c531023 · e085197b · d89dd2a9 · d89dd2a9
Commit d89dd2a9 authored Oct 21, 2013 by Robert Bradshaw
27 changed files
--- a/.gitignore
+++ b/.gitignore
@@ -4,6 +4,9 @@ __pycache__
 *.so
 *.o

+*.egg
+*.egg-info
+
 Cython/Compiler/*.c
 Cython/Plex/*.c
 Cython/Runtime/refnanny.c

--- a/.hgignore
+++ b/.hgignore
@@ -3,6 +3,8 @@ syntax: glob
 *.pyc
 *.pyo
 __pycache__
+*.egg
+*.egg-info

 Cython/Compiler/*.c
 Cython/Plex/*.c

--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -8,6 +8,10 @@ Cython Changelog
 Features added
 --------------

+* Using ``cdef basestring stringvar`` and function arguments typed as
+  ``basestring`` is now meaningful and allows assigning exactly
+  ``str`` and ``unicode`` objects, but no subtypes of these types.
+
 * Support for the ``__debug__`` builtin.

 * Assertions in Cython compiled modules are disabled if the running
@@ -24,7 +28,7 @@ Features added
  to the cythonize() compilation function (including distutils build).

 * The new extension type decorator ``@cython.no_gc_clear`` prevents
-  the type from being cleared during cyclic garbage collection, thus
+  objects from being cleared during cyclic garbage collection, thus
  making sure that object attributes are kept alive until deallocation.

 * During cyclic garbage collection, attributes of extension types that
@@ -57,7 +61,7 @@ Other changes
  cleanup instead of ``tp_del()``.


-0.19.2 (??)
+0.19.2 (2013-10-13)
 ===================

 Features added
@@ -66,6 +70,13 @@ Features added
 Bugs fixed
 ----------

+* Some standard declarations were fixed or updated, including the previously
+  incorrect declaration of ``PyBuffer_FillInfo()`` and some missing bits in
+  ``libc.math``.
+
+* Heap allocated subtypes of ``type`` used the wrong base type struct at the
+  C level.
+
 * Calling the unbound method dict.keys/value/items() in dict subtypes could
  call the bound object method instead of the unbound supertype method.


--- a/Cython/Compiler/Builtin.py
+++ b/Cython/Compiler/Builtin.py
@@ -408,7 +408,7 @@ def init_builtins():
        '__debug__', PyrexTypes.c_const_type(PyrexTypes.c_bint_type),
        pos=None, cname='(!Py_OptimizeFlag)', is_cdef=True)
    global list_type, tuple_type, dict_type, set_type, frozenset_type
-    global bytes_type, str_type, unicode_type
+    global bytes_type, str_type, unicode_type, basestring_type
    global float_type, bool_type, type_type, complex_type
    type_type  = builtin_scope.lookup('type').type
    list_type  = builtin_scope.lookup('list').type
@@ -419,6 +419,7 @@ def init_builtins():
    bytes_type = builtin_scope.lookup('bytes').type
    str_type   = builtin_scope.lookup('str').type
    unicode_type = builtin_scope.lookup('unicode').type
+    basestring_type = builtin_scope.lookup('basestring').type
    float_type = builtin_scope.lookup('float').type
    bool_type  = builtin_scope.lookup('bool').type
    complex_type  = builtin_scope.lookup('complex').type

--- a/Cython/Compiler/ExprNodes.py
+++ b/Cython/Compiler/ExprNodes.py
@@ -67,7 +67,9 @@ coercion_error_dict = {
    (Builtin.unicode_type, PyrexTypes.c_uchar_ptr_type) : "Unicode objects only support coercion to Py_UNICODE*.",
    (Builtin.bytes_type, Builtin.unicode_type) : "Cannot convert 'bytes' object to unicode implicitly, decoding required",
    (Builtin.bytes_type, Builtin.str_type) : "Cannot convert 'bytes' object to str implicitly. This is not portable to Py3.",
+    (Builtin.bytes_type, Builtin.basestring_type) : "Cannot convert 'bytes' object to basestring implicitly. This is not portable to Py3.",
    (Builtin.bytes_type, PyrexTypes.c_py_unicode_ptr_type) : "Cannot convert 'bytes' object to Py_UNICODE*, use 'unicode'.",
+    (Builtin.basestring_type, Builtin.bytes_type) : "Cannot convert 'basestring' object to bytes implicitly. This is not portable.",
    (Builtin.str_type, Builtin.unicode_type) : "str objects do not support coercion to unicode, use a unicode string literal instead (u'')",
    (Builtin.str_type, Builtin.bytes_type) : "Cannot convert 'str' to 'bytes' implicitly. This is not portable.",
    (Builtin.str_type, PyrexTypes.c_char_ptr_type) : "'str' objects do not support coercion to C types (use 'bytes'?).",
@@ -76,6 +78,7 @@ coercion_error_dict = {
    (PyrexTypes.c_char_ptr_type, Builtin.unicode_type) : "Cannot convert 'char*' to unicode implicitly, decoding required",
    (PyrexTypes.c_uchar_ptr_type, Builtin.unicode_type) : "Cannot convert 'char*' to unicode implicitly, decoding required",
 }
+
 def find_coercion_error(type_tuple, default, env):
    err = coercion_error_dict.get(type_tuple)
    if err is None:
@@ -1250,9 +1253,8 @@ class UnicodeNode(ConstNode):
                  "Unicode literals do not support coercion to C types other "
                  "than Py_UNICODE/Py_UCS4 (for characters) or Py_UNICODE* "
                  "(for strings).")
-        elif dst_type is not py_object_type:
-            if not self.check_for_coercion_error(dst_type, env):
-                self.fail_assignment(dst_type)
+        elif dst_type not in (py_object_type, Builtin.basestring_type):
+            self.check_for_coercion_error(dst_type, env, fail=True)
        return self

    def can_coerce_to_char_literal(self):
@@ -1337,7 +1339,8 @@ class StringNode(PyConstNode):
 #                return BytesNode(self.pos, value=self.value)
            if not dst_type.is_pyobject:
                return BytesNode(self.pos, value=self.value).coerce_to(dst_type, env)
-            self.check_for_coercion_error(dst_type, env, fail=True)
+            if dst_type is not Builtin.basestring_type:
+                self.check_for_coercion_error(dst_type, env, fail=True)
        return self

    def can_coerce_to_char_literal(self):
@@ -6677,8 +6680,10 @@ class ClassNode(ExprNode, ModuleNameMixin):

        if self.doc:
            code.put_error_if_neg(self.pos,
-                'PyDict_SetItemString(%s, "__doc__", %s)' % (
+                'PyDict_SetItem(%s, %s, %s)' % (
                    self.dict.py_result(),
+                    code.intern_identifier(
+                        StringEncoding.EncodedString("__doc__")),
                    self.doc.py_result()))
        py_mod_name = self.get_py_mod_name(code)
        qualname = self.get_py_qualified_name(code)

--- a/Cython/Compiler/ModuleNode.py
+++ b/Cython/Compiler/ModuleNode.py
@@ -1252,8 +1252,10 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
                if base_type.scope and base_type.scope.needs_gc():
                    code.putln("PyObject_GC_Track(o);")
                else:
-                    code.putln("if (PyType_IS_GC(Py_TYPE(o)->tp_base))"
-                               " PyObject_GC_Track(o);")
+                    code.putln("#if CYTHON_COMPILING_IN_CPYTHON")
+                    code.putln("if (PyType_IS_GC(Py_TYPE(o)->tp_base))")
+                    code.putln("#endif")
+                    code.putln("PyObject_GC_Track(o);")

            tp_dealloc = TypeSlots.get_base_slot_function(scope, tp_slot)
            if tp_dealloc is not None:
@@ -2197,6 +2199,8 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
        code.putln('#if CYTHON_COMPILING_IN_PYPY')
        code.putln('Py_CLEAR(%s);' % Naming.builtins_cname)
        code.putln('#endif')
+        code.put_decref_clear(env.module_dict_cname, py_object_type,
+                              nanny=False, clear_before_decref=True)

    def generate_main_method(self, env, code):
        module_is_main = "%s%s" % (Naming.module_is_main, self.full_module_name.replace('.', '__'))

--- a/Cython/Compiler/Nodes.py
+++ b/Cython/Compiler/Nodes.py
@@ -1368,8 +1368,8 @@ class CEnumDefNode(StatNode):
                        item.cname,
                        code.error_goto_if_null(temp, item.pos)))
                code.put_gotref(temp)
-                code.putln('if (__Pyx_SetAttrString(%s, "%s", %s) < 0) %s' % (
-                        Naming.module_cname,
+                code.putln('if (PyDict_SetItemString(%s, "%s", %s) < 0) %s' % (
+                        Naming.moddict_cname,
                        item.name,
                        temp,
                        code.error_goto(item.pos)))

--- a/Cython/Compiler/Optimize.py
+++ b/Cython/Compiler/Optimize.py
@@ -285,6 +285,29 @@ class IterationTransform(Visitor.EnvTransform):
        exception_value = '-1')

    def _transform_unicode_iteration(self, node, slice_node, reversed=False):
+        if slice_node.is_literal:
+            # try to reduce to byte iteration for plain Latin-1 strings
+            try:
+                bytes_value = BytesLiteral(slice_node.value.encode('latin1'))
+            except UnicodeEncodeError:
+                pass
+            else:
+                bytes_slice = ExprNodes.SliceIndexNode(
+                    slice_node.pos,
+                    base=ExprNodes.BytesNode(
+                        slice_node.pos, value=bytes_value,
+                        constant_result=bytes_value,
+                        type=PyrexTypes.c_char_ptr_type).coerce_to(
+                            PyrexTypes.c_uchar_ptr_type, self.current_env()),
+                    start=None,
+                    stop=ExprNodes.IntNode(
+                        slice_node.pos, value=len(bytes_value),
+                        constant_result=len(bytes_value),
+                        type=PyrexTypes.c_py_ssize_t_type),
+                    type=Builtin.unicode_type,  # hint for Python conversion
+                )
+                return self._transform_carray_iteration(node, bytes_slice, reversed)
+
        unpack_temp_node = UtilNodes.LetRefNode(
            slice_node.as_none_safe_node("'NoneType' is not iterable"))

@@ -455,22 +478,32 @@ class IterationTransform(Visitor.EnvTransform):
        counter_temp = counter.ref(node.target.pos)

        if slice_base.type.is_string and node.target.type.is_pyobject:
-            # special case: char* -> bytes
-            target_value = ExprNodes.SliceIndexNode(
-                node.target.pos,
-                start=ExprNodes.IntNode(node.target.pos, value='0',
-                                        constant_result=0,
-                                        type=PyrexTypes.c_int_type),
-                stop=ExprNodes.IntNode(node.target.pos, value='1',
-                                       constant_result=1,
-                                       type=PyrexTypes.c_int_type),
-                base=counter_temp,
-                type=Builtin.bytes_type,
-                is_temp=1)
+            # special case: char* -> bytes/unicode
+            if slice_node.type is Builtin.unicode_type:
+                target_value = ExprNodes.CastNode(
+                    ExprNodes.DereferenceNode(
+                        node.target.pos, operand=counter_temp,
+                        type=ptr_type.base_type),
+                    PyrexTypes.c_py_ucs4_type).coerce_to(
+                        node.target.type, self.current_env())
+            else:
+                # char* -> bytes coercion requires slicing, not indexing
+                target_value = ExprNodes.SliceIndexNode(
+                    node.target.pos,
+                    start=ExprNodes.IntNode(node.target.pos, value='0',
+                                            constant_result=0,
+                                            type=PyrexTypes.c_int_type),
+                    stop=ExprNodes.IntNode(node.target.pos, value='1',
+                                           constant_result=1,
+                                           type=PyrexTypes.c_int_type),
+                    base=counter_temp,
+                    type=Builtin.bytes_type,
+                    is_temp=1)
        elif node.target.type.is_ptr and not node.target.type.assignable_from(ptr_type.base_type):
            # Allow iteration with pointer target to avoid copy.
            target_value = counter_temp
        else:
+            # TODO: can this safely be replaced with DereferenceNode() as above?
            target_value = ExprNodes.IndexNode(
                node.target.pos,
                index=ExprNodes.IntNode(node.target.pos, value='0',

--- a/Cython/Compiler/PyrexTypes.py
+++ b/Cython/Compiler/PyrexTypes.py
@@ -962,7 +962,10 @@ class BuiltinObjectType(PyObjectType):

    def assignable_from(self, src_type):
        if isinstance(src_type, BuiltinObjectType):
-            return src_type.name == self.name
+            if self.name == 'basestring':
+                return src_type.name in ('str', 'unicode', 'basestring')
+            else:
+                return src_type.name == self.name
        elif src_type.is_extension_type:
            # FIXME: This is an ugly special case that we currently
            # keep supporting.  It allows users to specify builtin
@@ -1005,7 +1008,15 @@ class BuiltinObjectType(PyObjectType):
        check = 'likely(%s(%s))' % (type_check, arg)
        if not notnone:
            check += '||((%s) == Py_None)' % arg
-        error = '(PyErr_Format(PyExc_TypeError, "Expected %s, got %%.200s", Py_TYPE(%s)->tp_name), 0)' % (self.name, arg)
+        if self.name == 'basestring':
+            name = '(PY_MAJOR_VERSION < 3 ? "basestring" : "str")'
+            space_for_name = 16
+        else:
+            name = '"%s"' % self.name
+            # avoid wasting too much space but limit number of different format strings
+            space_for_name = (len(self.name) // 16 + 1) * 16
+        error = '(PyErr_Format(PyExc_TypeError, "Expected %%.%ds, got %%.200s", %s, Py_TYPE(%s)->tp_name), 0)' % (
+            space_for_name, name, arg)
        return check + '||' + error

    def declaration_code(self, entity_code,

--- a/Cython/Debugger/Cygdb.py
+++ b/Cython/Debugger/Cygdb.py
@@ -62,7 +62,7 @@ def make_command_file(path_to_debug_info, prefix_code='', no_import=False):

    return tempfilename

-usage = "Usage: cygdb [options] [PATH [GDB_ARGUMENTS]]"
+usage = "Usage: cygdb [options] [PATH [-- GDB_ARGUMENTS]]"

 def main(path_to_debug_info=None, gdb_argv=None, no_import=False):
    """
@@ -82,12 +82,12 @@ def main(path_to_debug_info=None, gdb_argv=None, no_import=False):
    (options, args) = parser.parse_args()
    if path_to_debug_info is None:
        if len(args) > 1:
-            path_to_debug_info = args[1]
+            path_to_debug_info = args[0]
        else:
            path_to_debug_info = os.curdir

    if gdb_argv is None:
-        gdb_argv = args[2:]
+        gdb_argv = args[1:]

    if path_to_debug_info == '--':
        no_import = True

--- a/Cython/Includes/cpython/array.pxd
+++ b/Cython/Includes/cpython/array.pxd
@@ -46,13 +46,13 @@
              : 2012-05-02 andreasvc
              : (see revision control)
 """
-from libc cimport stdlib
 from libc.string cimport strcat, strncat, \
    memset, memchr, memcmp, memcpy, memmove

 from cpython.object cimport Py_SIZE
 from cpython.ref cimport PyTypeObject, Py_TYPE
 from cpython.exc cimport PyErr_BadArgument
+from cpython.mem cimport PyMem_Malloc, PyMem_Free

 cdef extern from *:  # Hard-coded utility code hack.
    ctypedef class array.array [object arrayobject]
@@ -82,56 +82,53 @@ cdef extern from *:  # Hard-coded utility code hack.

    ctypedef class array.array [object arrayobject]:
        cdef __cythonbufferdefaults__ = {'ndim' : 1, 'mode':'c'}
-        
+
        cdef:
            Py_ssize_t ob_size
            arraydescr* ob_descr    # struct arraydescr *ob_descr;
            __data_union data

-        def __getbuffer__(array self, Py_buffer* info, int flags):
+        def __getbuffer__(self, Py_buffer* info, int flags):
            # This implementation of getbuffer is geared towards Cython
            # requirements, and does not yet fullfill the PEP.
            # In particular strided access is always provided regardless
            # of flags
-            cdef unsigned rows, columns, itemsize
-            
+            item_count = Py_SIZE(self)
+
            info.suboffsets = NULL
            info.buf = self.data.as_chars
            info.readonly = 0
            info.ndim = 1
-            info.itemsize = itemsize = self.ob_descr.itemsize   # e.g. sizeof(float)
-            
-            info.strides = <Py_ssize_t*> \
-                           stdlib.malloc(sizeof(Py_ssize_t) * info.ndim * 2 + 2)
-            info.shape = info.strides + 1
-            info.shape[0] = Py_SIZE(self)            # number of items
-            info.strides[0] = info.itemsize
-
-            info.format = <char*>(info.strides + 2 * info.ndim)
+            info.itemsize = self.ob_descr.itemsize   # e.g. sizeof(float)
+            info.len = info.itemsize * item_count
+
+            info.shape = <Py_ssize_t*> PyMem_Malloc(sizeof(Py_ssize_t) + 2)
+            if not info.shape:
+                raise MemoryError()
+            info.shape[0] = item_count      # constant regardless of resizing
+            info.strides = &info.itemsize
+
+            info.format = <char*> (info.shape + 1)
            info.format[0] = self.ob_descr.typecode
            info.format[1] = 0
            info.obj = self

-        def __releasebuffer__(array self, Py_buffer* info):
-            #if PyArray_HASFIELDS(self):
-            #    stdlib.free(info.format)
-            #if sizeof(npy_intp) != sizeof(Py_ssize_t):
-            stdlib.free(info.strides)
-        
+        def __releasebuffer__(self, Py_buffer* info):
+            PyMem_Free(info.shape)
+
    array newarrayobject(PyTypeObject* type, Py_ssize_t size, arraydescr *descr)

    # fast resize/realloc
    # not suitable for small increments; reallocation 'to the point'
-    int resize(array self, Py_ssize_t n)
+    int resize(array self, Py_ssize_t n) except -1
    # efficient for small increments (not in Py2.3-)
-    int resize_smart(array self, Py_ssize_t n)
+    int resize_smart(array self, Py_ssize_t n) except -1


 cdef inline array clone(array template, Py_ssize_t length, bint zero):
    """ fast creation of a new array, given a template array.
    type will be same as template.
    if zero is true, new array will be initialized with zeroes."""
-    cdef array op
    op = newarrayobject(Py_TYPE(template), length, template.ob_descr)
    if zero and op is not None:
        memset(op.data.as_chars, 0, length * op.ob_descr.itemsize)
@@ -139,28 +136,26 @@ cdef inline array clone(array template, Py_ssize_t length, bint zero):

 cdef inline array copy(array self):
    """ make a copy of an array. """
-    cdef array op
    op = newarrayobject(Py_TYPE(self), Py_SIZE(self), self.ob_descr)
    memcpy(op.data.as_chars, self.data.as_chars, Py_SIZE(op) * op.ob_descr.itemsize)
    return op

-cdef inline int extend_buffer(array self, char* stuff, Py_ssize_t n):
+cdef inline int extend_buffer(array self, char* stuff, Py_ssize_t n) except -1:
    """ efficent appending of new stuff of same type
    (e.g. of same array type)
    n: number of elements (not number of bytes!) """
    cdef Py_ssize_t itemsize = self.ob_descr.itemsize
-    cdef Py_ssize_t orgsize = Py_SIZE(self)
-    if resize_smart(self, orgsize + n) == -1:
-        return -1
-    memcpy(self.data.as_chars + orgsize * itemsize, stuff, n * itemsize)
+    cdef Py_ssize_t origsize = Py_SIZE(self)
+    resize_smart(self, origsize + n)
+    memcpy(self.data.as_chars + origsize * itemsize, stuff, n * itemsize)
+    return 0

-cdef inline int extend(array self, array other):
+cdef inline int extend(array self, array other) except -1:
    """ extend array with data from another array; types must match. """
-    if self.ob_descr.typecode != self.ob_descr.typecode:
+    if self.ob_descr.typecode != other.ob_descr.typecode:
        PyErr_BadArgument()
-        return -1
    return extend_buffer(self, other.data.as_chars, Py_SIZE(other))

-cdef inline void zero(array op):
+cdef inline void zero(array self):
    """ set all elements of array to zero. """
-    memset(op.data.as_chars, 0, Py_SIZE(op) * op.ob_descr.itemsize)
+    memset(self.data.as_chars, 0, Py_SIZE(self) * self.ob_descr.itemsize)
--- a/Cython/Includes/cpython/buffer.pxd
+++ b/Cython/Includes/cpython/buffer.pxd
@@ -96,14 +96,14 @@ cdef extern from "Python.h":
    # (Fortran-style if fort is 'F' or C-style otherwise) array of the
    # given shape with the given number of bytes per element.

-    int PyBuffer_FillInfo(Py_buffer *view, void *buf,
-                          Py_ssize_t len, int readonly,
-                          int flags) except -1
+    int PyBuffer_FillInfo(Py_buffer *view, object exporter, void *buf,
+                          Py_ssize_t len, int readonly, int flags) except -1
    # Fill in a buffer-info structure, view, correctly for an exporter
    # that can only share a contiguous chunk of memory of “unsigned
    # bytes” of the given length. Return 0 on success and -1 (with
    # raising an error) on error.

+    # DEPRECATED HERE: do not cimport from here, cimport from cpython.object instead
    object PyObject_Format(object obj, object format_spec)
    # Takes an arbitrary object and returns the result of calling
    # obj.__format__(format_spec).
--- a/Cython/Includes/cpython/object.pxd
+++ b/Cython/Includes/cpython/object.pxd
@@ -285,3 +285,8 @@ cdef extern from "Python.h":
    # and returns NULL if the object cannot be iterated.

    Py_ssize_t Py_SIZE(object o)
+
+    object PyObject_Format(object obj, object format_spec)
+    # Takes an arbitrary object and returns the result of calling
+    # obj.__format__(format_spec).
+    # Added in Py2.6
--- a/Cython/Runtime/refnanny.pyx
+++ b/Cython/Runtime/refnanny.pyx
@@ -86,8 +86,7 @@ cdef PyObject* SetupContext(char* funcname, int lineno, char* filename) except N
        # In that case, we don't want to be doing anything fancy
        # like caching and resetting exceptions.
        return NULL
-    cdef PyObject* type = NULL, *value = NULL, *tb = NULL
-    cdef PyObject* result = NULL
+    cdef (PyObject*) type = NULL, value = NULL, tb = NULL, result = NULL
    PyThreadState_Get()
    PyErr_Fetch(&type, &value, &tb)
    try:
@@ -101,7 +100,7 @@ cdef PyObject* SetupContext(char* funcname, int lineno, char* filename) except N

 cdef void GOTREF(PyObject* ctx, PyObject* p_obj, int lineno):
    if ctx == NULL: return
-    cdef PyObject* type = NULL, *value = NULL, *tb = NULL
+    cdef (PyObject*) type = NULL, value = NULL, tb = NULL
    PyErr_Fetch(&type, &value, &tb)
    try:
        try:
@@ -118,7 +117,7 @@ cdef void GOTREF(PyObject* ctx, PyObject* p_obj, int lineno):

 cdef int GIVEREF_and_report(PyObject* ctx, PyObject* p_obj, int lineno):
    if ctx == NULL: return 1
-    cdef PyObject* type = NULL, *value = NULL, *tb = NULL
+    cdef (PyObject*) type = NULL, value = NULL, tb = NULL
    cdef bint decref_ok = False
    PyErr_Fetch(&type, &value, &tb)
    try:
@@ -150,7 +149,7 @@ cdef void DECREF(PyObject* ctx, PyObject* obj, int lineno):

 cdef void FinishContext(PyObject** ctx):
    if ctx == NULL or ctx[0] == NULL: return
-    cdef PyObject* type = NULL, *value = NULL, *tb = NULL
+    cdef (PyObject*) type = NULL, value = NULL, tb = NULL
    cdef object errors = None
    cdef Context context
    PyThreadState_Get()

--- a/Cython/Utility/Builtins.c
+++ b/Cython/Utility/Builtins.c
@@ -201,7 +201,7 @@ static PyObject* __Pyx_Intern(PyObject* s); /* proto */

 static PyObject* __Pyx_Intern(PyObject* s) {
    if (!(likely(PyString_CheckExact(s)))) {
-        PyErr_Format(PyExc_TypeError, "Expected str, got %s", Py_TYPE(s)->tp_name);
+        PyErr_Format(PyExc_TypeError, "Expected %.16s, got %.200s", "str", Py_TYPE(s)->tp_name);
        return 0;
    }
    Py_INCREF(s);

--- a/Cython/Utility/FunctionArguments.c
+++ b/Cython/Utility/FunctionArguments.c
@@ -14,7 +14,10 @@ static int __Pyx_ArgTypeTest(PyObject *obj, PyTypeObject *type, int none_allowed
    }
    if (none_allowed && obj == Py_None) return 1;
    else if (exact) {
-        if (Py_TYPE(obj) == type) return 1;
+        if (likely(Py_TYPE(obj) == type)) return 1;
+        #if PY_MAJOR_VERSION == 2
+        else if ((type == &PyBaseString_Type) && __Pyx_PyBaseString_CheckExact(obj)) return 1;
+        #endif
    }
    else {
        if (PyObject_TypeCheck(obj, type)) return 1;

--- a/Cython/Utility/ModuleSetupCode.c
+++ b/Cython/Utility/ModuleSetupCode.c
@@ -186,7 +186,7 @@
 #else
  #define __Pyx_PyBaseString_Check(obj) (PyString_CheckExact(obj) || PyUnicode_CheckExact(obj) || \
                                         PyString_Check(obj) || PyUnicode_Check(obj))
-  #define __Pyx_PyBaseString_CheckExact(obj) (Py_TYPE(obj) == &PyBaseString_Type)
+  #define __Pyx_PyBaseString_CheckExact(obj) (PyString_CheckExact(obj) || PyUnicode_CheckExact(obj))
 #endif

 #if PY_VERSION_HEX < 0x02060000

--- a/Cython/Utility/TypeConversion.c
+++ b/Cython/Utility/TypeConversion.c
@@ -288,7 +288,7 @@ static {{struct_type_decl}} {{funcname}}(PyObject * o) {
    PyObject *value = NULL;

    if (!PyMapping_Check(o)) {
-        PyErr_Format(PyExc_TypeError, "Expected a mapping, not %s", o->ob_type->tp_name);
+        PyErr_Format(PyExc_TypeError, "Expected %.16s, got %.200s", "a mapping", Py_TYPE(o)->tp_name);
        goto bad;
    }


--- a/docs/src/tutorial/memory_allocation.rst
+++ b/docs/src/tutorial/memory_allocation.rst
@@ -45,34 +45,58 @@ A very simple example of malloc usage is the following::
            # return the previously allocated memory to the system
            free(my_array)

-One important thing to remember is that blocks of memory obtained with malloc
-*must* be manually released with free when one is done with them or it won't
-be reclaimed until the python process exits. This is called a memory leak.
-If a chuck of memory needs a larger lifetime then can be managed by a
-``try..finally`` block, another helpful idiom is to tie its lifetime to a
-Python object to leverage the Python runtime's memory management, e.g.::
+Note that the C-API functions for allocating memory on the Python heap
+are generally preferred over the low-level C functions above as the
+memory they provide is actually accounted for in Python's internal
+memory management system.  They also have special optimisations for
+smaller memory blocks, which speeds up their allocation by avoiding
+costly operating system calls.
+
+The C-API functions can be found in the ``cpython.mem`` standard
+declarations file::
+
+    from cpython.mem cimport PyMem_Malloc, PyMem_Realloc, PyMem_Free
+
+Their interface and usage is identical to that of the corresponding
+low-level C functions.
+
+One important thing to remember is that blocks of memory obtained with
+:c:func:`malloc` or :c:func:`PyMem_Malloc` *must* be manually released
+with a corresponding call to :c:func:`free` or :c:func:`PyMem_Free`
+when they are no longer used (and *must* always use the matching
+type of free function).  Otherwise, they won't be reclaimed until the
+python process exits.  This is called a memory leak.
+
+If a chunk of memory needs a larger lifetime than can be managed by a
+``try..finally`` block, another helpful idiom is to tie its lifetime
+to a Python object to leverage the Python runtime's memory management,
+e.g.::

  cdef class SomeMemory:
-  
-      cdef doube* data
-      
-      def __init__(self, number):
+
+      cdef double* data
+
+      def __cinit__(self, number):
          # allocate some memory (filled with random data)
-          self.data = <double*> malloc(number * sizeof(double))
-          if self.data == NULL:
+          self.data = <double*> PyMem_Malloc(number * sizeof(double))
+          if not self.data:
              raise MemoryError()
-    
+
      def resize(self, new_number):
          # Allocates new_number * sizeof(double) bytes,
          # preserving the contents and making a best-effort to
          # re-use the original data location.
-          self.data = <double*> realloc(self.data, new_number * sizeof(double))
-          
+          mem = <double*> PyMem_Realloc(self.data, new_number * sizeof(double))
+          if not mem:
+              raise MemoryError()
+          # Only overwrite the pointer if the memory was really reallocated.
+          # On error (mem is NULL), the originally memory has not been freed.
+          self.data = mem
+
      def __dealloc__(self, number):
-          if self.data != NULL:
-              free(self.data)
+          PyMem_Free(self.data)     # no-op if self.data is NULL

 It should be noted that Cython has special support for (multi-dimensional)
 arrays of simple types via NumPy and memory views which are more full featured
 and easier to work with than pointers while still retaining the speed/static
-typing benefits. 
\ No newline at end of file
+typing benefits.
--- a/docs/src/tutorial/strings.rst
+++ b/docs/src/tutorial/strings.rst
@@ -16,18 +16,23 @@ implicitly insert these encoding/decoding steps.
 Python string types in Cython code
 ----------------------------------

-Cython supports three Python string types: ``bytes``, ``str``
-and ``unicode``.  The ``str`` type is special in that it is the
-byte string in Python 2 and the Unicode string in Python 3 (for Cython
-code compiled with language level 2, i.e. the default).  Thus, in Python
-2, both ``bytes`` and ``str`` represent the byte string type,
-whereas in Python 3, ``str`` and ``unicode`` represent the Python
-Unicode string type.  The switch is made at C compile time, the Python
-version that is used to run Cython is not relevant.
-
-When compiling Cython code with language level 3, the ``str`` type
-is identified with exactly the Unicode string type at Cython compile time,
-i.e. it no does not identify with ``bytes`` when running in Python 2.
+Cython supports four Python string types: ``bytes``, ``str``,
+``unicode`` and ``basestring``.  The ``bytes`` and ``unicode`` types
+are the specific types known from normal Python 2.x (named ``bytes``
+and ``str`` in Python 3).
+
+The ``str`` type is special in that it is the byte string in Python 2
+and the Unicode string in Python 3 (for Cython code compiled with
+language level 2, i.e. the default).  Meaning, it always corresponds
+exactly with the type that the Python runtime itself calls ``str``.
+Thus, in Python 2, both ``bytes`` and ``str`` represent the byte string
+type, whereas in Python 3, both ``str`` and ``unicode`` represent the
+Python Unicode string type.  The switch is made at C compile time, the
+Python version that is used to run Cython is not relevant.
+
+When compiling Cython code with language level 3, the ``str`` type is
+identified with exactly the Unicode string type at Cython compile time,
+i.e. it does not identify with ``bytes`` when running in Python 2.

 Note that the ``str`` type is not compatible with the ``unicode``
 type in Python 2, i.e. you cannot assign a Unicode string to a variable
@@ -40,6 +45,17 @@ and users normally expect code to be able to work with both.  Code that
 only targets Python 3 can safely type variables and arguments as either
 ``bytes`` or ``unicode``.

+The ``basestring`` type represents both the types ``str`` and ``unicode``,
+i.e. all Python text string types in Python 2 and Python 3.  This can be
+used for typing text variables that normally contain Unicode text (at
+least in Python 3) but must additionally accept the ``str`` type in
+Python 2 for backwards compatibility reasons.  It is not compatible with
+the ``bytes`` type.  Its usage should be rare in normal Cython code as
+the generic ``object`` type (i.e. untyped code) will normally be good
+enough and has the additional advantage of supporting the assignment of
+string subtypes.  Support for the ``basestring`` type is new in Cython
+0.20.
+

 General notes about C strings
 -----------------------------

--- a/docs/src/userguide/limitations.rst
+++ b/docs/src/userguide/limitations.rst
@@ -20,9 +20,8 @@ Most of these things that fall more into the implementation details rather
 than semantics, and we may decide not to fix (or require a --pedantic flag to get).


-==========
-Nested tuple argument unpacking.
-==========
+Nested tuple argument unpacking
+===============================

 ::

@@ -32,9 +31,8 @@ Nested tuple argument unpacking.
 This was removed in Python 3.


-==========
 Inspect support
-==========
+===============

 While it is quite possible to emulate the interface of functions in
 Cython's own function type, and recent Cython releases have seen several
@@ -45,9 +43,8 @@ base class. This has a negative impact on code that uses inspect to
 inspect function objects, but would require a change to Python itself.


-==========
 Stack frames
-==========
+============

 Currently we generate fake tracebacks as part of exception propagation,
 but don't fill in locals and can't fill in co_code.
@@ -55,18 +52,15 @@ To be fully compatible, we would have to generate these stack frame objects at
 function call time (with a potential performance penalty).  We may have an
 option to enable this for debugging.

-==========
-Identity vs. equality for inferred literals.
-==========
-
-::
-    a = 1.0        # a inferred to be double
-    b = c = None   # a inferred to be type object
-    if some_runtime_expression:
-        b = a
-        c = a
-    print b is c   # py float created twice
-

+Identity vs. equality for inferred literals
+===========================================

+::

+    a = 1.0          # a inferred to be C type 'double'
+    b = c = None     # b and c inferred to be type 'object'
+    if some_runtime_expression:
+        b = a        # creates a new Python float object
+        c = a        # creates a new Python float object
+    print b is c     # most likely not the same object
--- a/tests/errors/string_assignments.pyx
+++ b/tests/errors/string_assignments.pyx
@@ -15,6 +15,9 @@ cdef char* c2 = b"abc"
 cdef bytes b2 = c1
 cdef char* c3 = b1

+cdef basestring bs1  =  "abc"
+cdef basestring bs2  = u"abc"
+
 cdef object o1  =  "abc"
 cdef object o2  = b"abc"
 cdef object o3  = u"abc"
@@ -24,6 +27,10 @@ o5 = b1
 o6 = s1
 o7 = u1
 o8 = cu1
+o9 = bs1
+
+u1 = bs1
+s1 = bs1

 # errors:
 cdef char* c_f1   = u"abc"
@@ -38,6 +45,7 @@ cdef Py_UNICODE* cu_f4 = b"abc"
 cdef bytes b_f1   = u"abc"
 cdef bytes b_f2   = u1
 cdef bytes b_f3   = s1
+cdef bytes b_f4   = bs1

 cdef str s_f1  = b"abc"
 cdef str s_f2  = b1
@@ -50,6 +58,9 @@ cdef unicode u_f3 = b"abc"
 cdef unicode u_f4 = b1
 cdef unicode u_f5 = c1

+cdef basestring bs_f1 = b"abc"
+cdef basestring bs_f2 = b1
+
 cdef tuple t_f1 =  "abc"
 cdef tuple t_f2 = u"abc"
 cdef tuple t_f3 = b"abc"
@@ -64,36 +75,40 @@ print <unicode>c1
 print <unicode>c1[1:2]

 _ERRORS = u"""
-29:20: Unicode literals do not support coercion to C types other than Py_UNICODE/Py_UCS4 (for characters) or Py_UNICODE* (for strings).
-30:22: Unicode objects only support coercion to Py_UNICODE*.
-31:22: 'str' objects do not support coercion to C types (use 'bytes'?).
-
-33:27: Cannot assign type 'char *' to 'Py_UNICODE *'
-34:27: Cannot convert 'bytes' object to Py_UNICODE*, use 'unicode'.
-35:27: 'str' objects do not support coercion to C types (use 'unicode'?).
-36:25: Cannot convert 'bytes' object to Py_UNICODE*, use 'unicode'.
-
-38:20: Cannot convert Unicode string to 'bytes' implicitly, encoding required.
-39:22: Cannot convert Unicode string to 'bytes' implicitly, encoding required.
-40:22: Cannot convert 'str' to 'bytes' implicitly. This is not portable.
-
-42:17: Cannot convert 'bytes' object to str implicitly. This is not portable to Py3.
-43:19: Cannot convert 'bytes' object to str implicitly. This is not portable to Py3.
-44:17: Cannot convert Unicode string to 'str' implicitly. This is not portable and requires explicit encoding.
-45:19: Cannot convert Unicode string to 'str' implicitly. This is not portable and requires explicit encoding.
-
-47:20: str objects do not support coercion to unicode, use a unicode string literal instead (u'')
-48:22: str objects do not support coercion to unicode, use a unicode string literal instead (u'')
-49:20: Cannot convert 'bytes' object to unicode implicitly, decoding required
-50:22: Cannot convert 'bytes' object to unicode implicitly, decoding required
-51:22: Cannot convert 'char*' to unicode implicitly, decoding required
-
-53:19: Cannot assign type 'str object' to 'tuple object'
-54:18: Cannot assign type 'unicode object' to 'tuple object'
-55:18: Cannot assign type 'bytes object' to 'tuple object'
-
-61:13: default encoding required for conversion from 'char *' to 'str object'
-62:13: default encoding required for conversion from 'char *' to 'str object'
-63:17: Cannot convert 'char*' to unicode implicitly, decoding required
-64:17: default encoding required for conversion from 'char *' to 'unicode object'
+36:20: Unicode literals do not support coercion to C types other than Py_UNICODE/Py_UCS4 (for characters) or Py_UNICODE* (for strings).
+37:22: Unicode objects only support coercion to Py_UNICODE*.
+38:22: 'str' objects do not support coercion to C types (use 'bytes'?).
+
+40:27: Cannot assign type 'char *' to 'Py_UNICODE *'
+41:27: Cannot convert 'bytes' object to Py_UNICODE*, use 'unicode'.
+42:27: 'str' objects do not support coercion to C types (use 'unicode'?).
+43:25: Cannot convert 'bytes' object to Py_UNICODE*, use 'unicode'.
+
+45:20: Cannot convert Unicode string to 'bytes' implicitly, encoding required.
+46:22: Cannot convert Unicode string to 'bytes' implicitly, encoding required.
+47:22: Cannot convert 'str' to 'bytes' implicitly. This is not portable.
+48:23: Cannot convert 'basestring' object to bytes implicitly. This is not portable.
+
+50:17: Cannot convert 'bytes' object to str implicitly. This is not portable to Py3.
+51:19: Cannot convert 'bytes' object to str implicitly. This is not portable to Py3.
+52:17: Cannot convert Unicode string to 'str' implicitly. This is not portable and requires explicit encoding.
+53:19: Cannot convert Unicode string to 'str' implicitly. This is not portable and requires explicit encoding.
+
+55:20: str objects do not support coercion to unicode, use a unicode string literal instead (u'')
+56:22: str objects do not support coercion to unicode, use a unicode string literal instead (u'')
+57:20: Cannot convert 'bytes' object to unicode implicitly, decoding required
+58:22: Cannot convert 'bytes' object to unicode implicitly, decoding required
+59:22: Cannot convert 'char*' to unicode implicitly, decoding required
+
+61:24: Cannot convert 'bytes' object to basestring implicitly. This is not portable to Py3.
+62:26: Cannot convert 'bytes' object to basestring implicitly. This is not portable to Py3.
+
+64:19: Cannot assign type 'str object' to 'tuple object'
+65:18: Cannot assign type 'unicode object' to 'tuple object'
+66:18: Cannot assign type 'bytes object' to 'tuple object'
+
+72:13: default encoding required for conversion from 'char *' to 'str object'
+73:13: default encoding required for conversion from 'char *' to 'str object'
+74:17: Cannot convert 'char*' to unicode implicitly, decoding required
+75:17: default encoding required for conversion from 'char *' to 'unicode object'
 """
--- a/tests/run/builtin_basestring.pyx
+++ b/tests/run/builtin_basestring.pyx
@@ -37,3 +37,50 @@ def unicode_subtypes_basestring():
    True
    """
    return issubclass(unicode, basestring)
+
+
+def basestring_typed_variable(obj):
+    """
+    >>> basestring_typed_variable(None) is None
+    True
+    >>> basestring_typed_variable(ustring) is ustring
+    True
+    >>> basestring_typed_variable(sstring) is sstring
+    True
+    >>> if IS_PY3: print(True)
+    ... else: print(basestring_typed_variable(bstring) is bstring)
+    True
+    >>> class S(str): pass
+    >>> basestring_typed_variable(S())   # doctest: +ELLIPSIS
+    Traceback (most recent call last):
+    TypeError: ...got S...
+    """
+    cdef basestring s
+    s = u'abc'
+    assert s
+    s = 'abc'
+    assert s
+    # make sure coercion also works in conditional expressions
+    s = u'abc' if obj else 'abc'
+    assert s
+    s = obj
+    return s
+
+
+def basestring_typed_argument(basestring obj):
+    """
+    >>> basestring_typed_argument(None) is None
+    True
+    >>> basestring_typed_argument(ustring) is ustring
+    True
+    >>> basestring_typed_argument(sstring) is sstring
+    True
+    >>> if IS_PY3: print(True)
+    ... else: print(basestring_typed_argument(bstring) is bstring)
+    True
+    >>> class S(str): pass
+    >>> basestring_typed_argument(S())   # doctest: +ELLIPSIS
+    Traceback (most recent call last):
+    TypeError: ...got S...
+    """
+    return obj
--- a/tests/run/cython3.pyx
+++ b/tests/run/cython3.pyx
@@ -291,7 +291,7 @@ def loop_over_unicode_literal():
    """
    # Py_UCS4 can represent any Unicode character
    for uchar in 'abcdefg':
-        pass
+        assert uchar in 'abcdefg'
    return cython.typeof(uchar)

 def list_comp():

--- a/tests/run/py_ucs4_type.pyx
+++ b/tests/run/py_ucs4_type.pyx
@@ -209,14 +209,31 @@ def count_lower_case_characters_slice_reversed(unicode ustring):
             count += 1
    return count

+def loop_object_over_latin1_unicode_literal():
+    """
+    >>> result = loop_object_over_latin1_unicode_literal()
+    >>> print(result[:-1])
+    abcdefg
+    >>> ord(result[-1]) == 0xD7
+    True
+    """
+    cdef object uchar
+    chars = []
+    for uchar in u'abcdefg\xD7':
+        chars.append(uchar)
+    return u''.join(chars)
+
 def loop_object_over_unicode_literal():
    """
-    >>> print(loop_object_over_unicode_literal())
+    >>> result = loop_object_over_unicode_literal()
+    >>> print(result[:-1])
    abcdefg
+    >>> ord(result[-1]) == 0xF8FD
+    True
    """
    cdef object uchar
    chars = []
-    for uchar in u'abcdefg':
+    for uchar in u'abcdefg\uF8FD':
        chars.append(uchar)
    return u''.join(chars)


--- a/tests/run/pyarray.pyx
+++ b/tests/run/pyarray.pyx
@@ -147,8 +147,15 @@ def test_extend():
    """
    cdef array.array ca = array.array('i', [1, 2, 3])
    cdef array.array cb = array.array('i', [4, 5])
+    cdef array.array cf = array.array('f', [1.0, 2.0, 3.0])
    array.extend(ca, cb)
    assert list(ca) == [1, 2, 3, 4, 5], list(ca)
+    try:
+        array.extend(ca, cf)
+    except TypeError:
+        pass
+    else:
+        assert False, 'extending incompatible array types did not raise'

 def test_likes(a):
    """

--- a/tests/run/struct_conversion.pyx
+++ b/tests/run/struct_conversion.pyx
@@ -87,7 +87,7 @@ def test_obj_to_struct(MyStruct mystruct):
    >>> test_obj_to_struct(None)
    Traceback (most recent call last):
       ...
-    TypeError: Expected a mapping, not NoneType
+    TypeError: Expected a mapping, got NoneType
    >>> test_obj_to_struct(dict(s=b"world"))
    Traceback (most recent call last):
       ...