Commit 6900b530 authored by Stefan Behnel's avatar Stefan Behnel

support 'bytearray' in the same way as 'bytes', starting with Py2.6

parent 3f6a6967
...@@ -8,6 +8,9 @@ Cython Changelog ...@@ -8,6 +8,9 @@ Cython Changelog
Features added Features added
-------------- --------------
* ``bytearray`` has become a known type and supports coercion from and
to C strings.
* Using ``cdef basestring stringvar`` and function arguments typed as * Using ``cdef basestring stringvar`` and function arguments typed as
``basestring`` is now meaningful and allows assigning exactly ``basestring`` is now meaningful and allows assigning exactly
``str`` and ``unicode`` objects, but no subtypes of these types. ``str`` and ``unicode`` objects, but no subtypes of these types.
......
...@@ -271,6 +271,7 @@ builtin_types_table = [ ...@@ -271,6 +271,7 @@ builtin_types_table = [
]), ]),
("basestring", "PyBaseString_Type", []), ("basestring", "PyBaseString_Type", []),
("bytearray", "PyByteArray_Type", []),
("bytes", "PyBytes_Type", [BuiltinMethod("__contains__", "TO", "b", "PySequence_Contains"), ("bytes", "PyBytes_Type", [BuiltinMethod("__contains__", "TO", "b", "PySequence_Contains"),
]), ]),
("str", "PyString_Type", [BuiltinMethod("__contains__", "TO", "b", "PySequence_Contains"), ("str", "PyString_Type", [BuiltinMethod("__contains__", "TO", "b", "PySequence_Contains"),
...@@ -409,7 +410,7 @@ def init_builtins(): ...@@ -409,7 +410,7 @@ def init_builtins():
pos=None, cname='(!Py_OptimizeFlag)', is_cdef=True) pos=None, cname='(!Py_OptimizeFlag)', is_cdef=True)
global list_type, tuple_type, dict_type, set_type, frozenset_type global list_type, tuple_type, dict_type, set_type, frozenset_type
global bytes_type, str_type, unicode_type, basestring_type global bytes_type, str_type, unicode_type, basestring_type
global float_type, bool_type, type_type, complex_type global float_type, bool_type, type_type, complex_type, bytearray_type
type_type = builtin_scope.lookup('type').type type_type = builtin_scope.lookup('type').type
list_type = builtin_scope.lookup('list').type list_type = builtin_scope.lookup('list').type
tuple_type = builtin_scope.lookup('tuple').type tuple_type = builtin_scope.lookup('tuple').type
...@@ -420,6 +421,7 @@ def init_builtins(): ...@@ -420,6 +421,7 @@ def init_builtins():
str_type = builtin_scope.lookup('str').type str_type = builtin_scope.lookup('str').type
unicode_type = builtin_scope.lookup('unicode').type unicode_type = builtin_scope.lookup('unicode').type
basestring_type = builtin_scope.lookup('basestring').type basestring_type = builtin_scope.lookup('basestring').type
bytearray_type = builtin_scope.lookup('bytearray').type
float_type = builtin_scope.lookup('float').type float_type = builtin_scope.lookup('float').type
bool_type = builtin_scope.lookup('bool').type bool_type = builtin_scope.lookup('bool').type
complex_type = builtin_scope.lookup('complex').type complex_type = builtin_scope.lookup('complex').type
......
...@@ -10,7 +10,8 @@ cython.declare(error=object, warning=object, warn_once=object, InternalError=obj ...@@ -10,7 +10,8 @@ cython.declare(error=object, warning=object, warn_once=object, InternalError=obj
list_type=object, tuple_type=object, set_type=object, dict_type=object, list_type=object, tuple_type=object, set_type=object, dict_type=object,
unicode_type=object, str_type=object, bytes_type=object, type_type=object, unicode_type=object, str_type=object, bytes_type=object, type_type=object,
Builtin=object, Symtab=object, Utils=object, find_coercion_error=object, Builtin=object, Symtab=object, Utils=object, find_coercion_error=object,
debug_disposal_code=object, debug_temp_alloc=object, debug_coercion=object) debug_disposal_code=object, debug_temp_alloc=object, debug_coercion=object,
bytearray_type=object)
import sys import sys
import copy import copy
...@@ -28,7 +29,7 @@ from PyrexTypes import py_object_type, c_long_type, typecast, error_type, \ ...@@ -28,7 +29,7 @@ from PyrexTypes import py_object_type, c_long_type, typecast, error_type, \
unspecified_type unspecified_type
import TypeSlots import TypeSlots
from Builtin import list_type, tuple_type, set_type, dict_type, \ from Builtin import list_type, tuple_type, set_type, dict_type, \
unicode_type, str_type, bytes_type, type_type unicode_type, str_type, bytes_type, bytearray_type, type_type
import Builtin import Builtin
import Symtab import Symtab
from Cython import Utils from Cython import Utils
...@@ -3674,8 +3675,9 @@ class SliceIndexNode(ExprNode): ...@@ -3674,8 +3675,9 @@ class SliceIndexNode(ExprNode):
def coerce_to(self, dst_type, env): def coerce_to(self, dst_type, env):
if ((self.base.type.is_string or self.base.type.is_cpp_string) if ((self.base.type.is_string or self.base.type.is_cpp_string)
and dst_type in (bytes_type, str_type, unicode_type)): and dst_type in (bytes_type, bytearray_type, str_type, unicode_type)):
if dst_type is not bytes_type and not env.directives['c_string_encoding']: if (dst_type not in (bytes_type, bytearray_type)
and not env.directives['c_string_encoding']):
error(self.pos, error(self.pos,
"default encoding required for conversion from '%s' to '%s'" % "default encoding required for conversion from '%s' to '%s'" %
(self.base.type, dst_type)) (self.base.type, dst_type))
...@@ -3696,11 +3698,15 @@ class SliceIndexNode(ExprNode): ...@@ -3696,11 +3698,15 @@ class SliceIndexNode(ExprNode):
base_result = self.base.result() base_result = self.base.result()
if self.base.type != PyrexTypes.c_char_ptr_type: if self.base.type != PyrexTypes.c_char_ptr_type:
base_result = '((const char*)%s)' % base_result base_result = '((const char*)%s)' % base_result
if self.type is bytearray_type:
type_name = 'ByteArray'
else:
type_name = self.type.name.title()
if self.stop is None: if self.stop is None:
code.putln( code.putln(
"%s = __Pyx_Py%s_FromString(%s + %s); %s" % ( "%s = __Pyx_Py%s_FromString(%s + %s); %s" % (
result, result,
self.type.name.title(), type_name,
base_result, base_result,
start_code, start_code,
code.error_goto_if_null(result, self.pos))) code.error_goto_if_null(result, self.pos)))
...@@ -3708,7 +3714,7 @@ class SliceIndexNode(ExprNode): ...@@ -3708,7 +3714,7 @@ class SliceIndexNode(ExprNode):
code.putln( code.putln(
"%s = __Pyx_Py%s_FromStringAndSize(%s + %s, %s - %s); %s" % ( "%s = __Pyx_Py%s_FromStringAndSize(%s + %s, %s - %s); %s" % (
result, result,
self.type.name.title(), type_name,
base_result, base_result,
start_code, start_code,
stop_code, stop_code,
...@@ -10289,7 +10295,8 @@ class CoerceToPyTypeNode(CoercionNode): ...@@ -10289,7 +10295,8 @@ class CoerceToPyTypeNode(CoercionNode):
elif arg.type.is_complex: elif arg.type.is_complex:
self.type = Builtin.complex_type self.type = Builtin.complex_type
elif arg.type.is_string or arg.type.is_cpp_string: elif arg.type.is_string or arg.type.is_cpp_string:
if type is not bytes_type and not env.directives['c_string_encoding']: if (type not in (bytes_type, bytearray_type)
and not env.directives['c_string_encoding']):
error(arg.pos, error(arg.pos,
"default encoding required for conversion from '%s' to '%s'" % "default encoding required for conversion from '%s' to '%s'" %
(arg.type, type)) (arg.type, type))
...@@ -10335,9 +10342,11 @@ class CoerceToPyTypeNode(CoercionNode): ...@@ -10335,9 +10342,11 @@ class CoerceToPyTypeNode(CoercionNode):
funccall = arg_type.get_to_py_function(self.env, self.arg) funccall = arg_type.get_to_py_function(self.env, self.arg)
else: else:
func = arg_type.to_py_function func = arg_type.to_py_function
if ((arg_type.is_string or arg_type.is_cpp_string) if arg_type.is_string or arg_type.is_cpp_string:
and self.type in (bytes_type, str_type, unicode_type)): if self.type in (bytes_type, str_type, unicode_type):
func = func.replace("Object", self.type.name.title()) func = func.replace("Object", self.type.name.title())
elif self.type is bytearray_type:
func = func.replace("Object", "ByteArray")
funccall = "%s(%s)" % (func, self.arg.result()) funccall = "%s(%s)" % (func, self.arg.result())
code.putln('%s = %s; %s' % ( code.putln('%s = %s; %s' % (
......
...@@ -952,7 +952,7 @@ class BuiltinObjectType(PyObjectType): ...@@ -952,7 +952,7 @@ class BuiltinObjectType(PyObjectType):
return "<%s>"% self.cname return "<%s>"% self.cname
def default_coerced_ctype(self): def default_coerced_ctype(self):
if self.name == 'bytes': if self.name in ('bytes', 'bytearray'):
return c_char_ptr_type return c_char_ptr_type
elif self.name == 'bool': elif self.name == 'bool':
return c_bint_type return c_bint_type
...@@ -992,6 +992,8 @@ class BuiltinObjectType(PyObjectType): ...@@ -992,6 +992,8 @@ class BuiltinObjectType(PyObjectType):
type_check = 'PyString_Check' type_check = 'PyString_Check'
elif type_name == 'basestring': elif type_name == 'basestring':
type_check = '__Pyx_PyBaseString_Check' type_check = '__Pyx_PyBaseString_Check'
elif type_name == 'bytearray':
type_check = 'PyByteArray_Check'
elif type_name == 'frozenset': elif type_name == 'frozenset':
type_check = 'PyFrozenSet_Check' type_check = 'PyFrozenSet_Check'
else: else:
......
...@@ -941,6 +941,7 @@ class BuiltinScope(Scope): ...@@ -941,6 +941,7 @@ class BuiltinScope(Scope):
"complex":["((PyObject*)&PyComplex_Type)", py_object_type], "complex":["((PyObject*)&PyComplex_Type)", py_object_type],
"bytes": ["((PyObject*)&PyBytes_Type)", py_object_type], "bytes": ["((PyObject*)&PyBytes_Type)", py_object_type],
"bytearray": ["((PyObject*)&PyByteArray_Type)", py_object_type],
"str": ["((PyObject*)&PyString_Type)", py_object_type], "str": ["((PyObject*)&PyString_Type)", py_object_type],
"unicode":["((PyObject*)&PyUnicode_Type)", py_object_type], "unicode":["((PyObject*)&PyUnicode_Type)", py_object_type],
......
...@@ -5,6 +5,8 @@ ...@@ -5,6 +5,8 @@
static CYTHON_INLINE char* __Pyx_PyObject_AsString(PyObject*); static CYTHON_INLINE char* __Pyx_PyObject_AsString(PyObject*);
static CYTHON_INLINE char* __Pyx_PyObject_AsStringAndSize(PyObject*, Py_ssize_t* length); static CYTHON_INLINE char* __Pyx_PyObject_AsStringAndSize(PyObject*, Py_ssize_t* length);
#define __Pyx_PyByteArray_FromString(s) PyByteArray_FromStringAndSize(s, strlen(s))
#define __Pyx_PyByteArray_FromStringAndSize(s, l) PyByteArray_FromStringAndSize(s, l)
#define __Pyx_PyBytes_FromString PyBytes_FromString #define __Pyx_PyBytes_FromString PyBytes_FromString
#define __Pyx_PyBytes_FromStringAndSize PyBytes_FromStringAndSize #define __Pyx_PyBytes_FromStringAndSize PyBytes_FromStringAndSize
static CYTHON_INLINE PyObject* __Pyx_PyUnicode_FromString(char*); static CYTHON_INLINE PyObject* __Pyx_PyUnicode_FromString(char*);
...@@ -20,6 +22,7 @@ static CYTHON_INLINE PyObject* __Pyx_PyUnicode_FromString(char*); ...@@ -20,6 +22,7 @@ static CYTHON_INLINE PyObject* __Pyx_PyUnicode_FromString(char*);
#define __Pyx_PyObject_AsUString(s) ((unsigned char*) __Pyx_PyObject_AsString(s)) #define __Pyx_PyObject_AsUString(s) ((unsigned char*) __Pyx_PyObject_AsString(s))
#define __Pyx_PyObject_FromUString(s) __Pyx_PyObject_FromString((char*)s) #define __Pyx_PyObject_FromUString(s) __Pyx_PyObject_FromString((char*)s)
#define __Pyx_PyBytes_FromUString(s) __Pyx_PyBytes_FromString((char*)s) #define __Pyx_PyBytes_FromUString(s) __Pyx_PyBytes_FromString((char*)s)
#define __Pyx_PyByteArray_FromUString(s) __Pyx_PyByteArray_FromString((char*)s)
#define __Pyx_PyStr_FromUString(s) __Pyx_PyStr_FromString((char*)s) #define __Pyx_PyStr_FromUString(s) __Pyx_PyStr_FromString((char*)s)
#define __Pyx_PyUnicode_FromUString(s) __Pyx_PyUnicode_FromString((char*)s) #define __Pyx_PyUnicode_FromUString(s) __Pyx_PyUnicode_FromString((char*)s)
...@@ -190,10 +193,17 @@ static CYTHON_INLINE char* __Pyx_PyObject_AsStringAndSize(PyObject* o, Py_ssize_ ...@@ -190,10 +193,17 @@ static CYTHON_INLINE char* __Pyx_PyObject_AsStringAndSize(PyObject* o, Py_ssize_
#endif /* PY_VERSION_HEX < 0x03030000 */ #endif /* PY_VERSION_HEX < 0x03030000 */
} else } else
#endif /* __PYX_DEFAULT_STRING_ENCODING_IS_ASCII || __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT */ #endif /* __PYX_DEFAULT_STRING_ENCODING_IS_ASCII || __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT */
#if PY_VERSION_HEX >= 0x02060000
if (PyByteArray_Check(o)) {
*length = PyByteArray_GET_SIZE(o);
return PyByteArray_AS_STRING(o);
} else
#endif
{ {
char* result; char* result;
int r = PyBytes_AsStringAndSize(o, &result, length); int r = PyBytes_AsStringAndSize(o, &result, length);
if (r < 0) { if (unlikely(r < 0)) {
return NULL; return NULL;
} else { } else {
return result; return result;
......
...@@ -19,7 +19,9 @@ Python string types in Cython code ...@@ -19,7 +19,9 @@ Python string types in Cython code
Cython supports four Python string types: ``bytes``, ``str``, Cython supports four Python string types: ``bytes``, ``str``,
``unicode`` and ``basestring``. The ``bytes`` and ``unicode`` types ``unicode`` and ``basestring``. The ``bytes`` and ``unicode`` types
are the specific types known from normal Python 2.x (named ``bytes`` are the specific types known from normal Python 2.x (named ``bytes``
and ``str`` in Python 3). and ``str`` in Python 3). Additionally, Cython also supports the
``bytearray`` type starting with Python 2.6. It behaves like the
``bytes`` type, except that it is mutable.
The ``str`` type is special in that it is the byte string in Python 2 The ``str`` type is special in that it is the byte string in Python 2
and the Unicode string in Python 3 (for Cython code compiled with and the Unicode string in Python 3 (for Cython code compiled with
...@@ -161,6 +163,13 @@ however, when the C function stores the pointer for later use. Apart ...@@ -161,6 +163,13 @@ however, when the C function stores the pointer for later use. Apart
from keeping a Python reference to the string object, no manual memory from keeping a Python reference to the string object, no manual memory
management is required. management is required.
Starting with Cython 0.20, the ``bytearray`` type is supported and
coerces in the same way as the ``bytes`` type. However, when using it
in a C context, special care must be taken not to grow or shrink the
object buffer after converting it to a C string pointer. These
modifications can change the internal buffer address, which will make
the pointer invalid.
Dealing with "const" Dealing with "const"
-------------------- --------------------
......
...@@ -235,6 +235,7 @@ VER_DEP_MODULES = { ...@@ -235,6 +235,7 @@ VER_DEP_MODULES = {
'run.pure_py', # decorators, with statement 'run.pure_py', # decorators, with statement
'run.purecdef', 'run.purecdef',
'run.struct_conversion', 'run.struct_conversion',
'run.bytearray_coercion',
# memory views require buffer protocol # memory views require buffer protocol
'memoryview.relaxed_strides', 'memoryview.relaxed_strides',
'memoryview.cythonarray', 'memoryview.cythonarray',
......
# mode: run
# NOTE: Py2.6+ only
cpdef bytearray coerce_to_charptr(char* b):
"""
>>> b = bytearray(b'abc')
>>> coerced = coerce_to_charptr(b)
>>> coerced == b or coerced
True
>>> isinstance(coerced, bytearray) or type(coerced)
True
"""
return b
def coerce_to_charptrs(bytearray b):
"""
>>> b = bytearray(b'abc')
>>> coerce_to_charptrs(b)
True
"""
cdef char* cs = b
cdef unsigned char* ucs = b
cdef signed char* scs = b
return b == <bytearray>cs == <bytearray> ucs == <bytearray>scs
cpdef bytearray coerce_charptr_slice(char* b):
"""
>>> b = bytearray(b'abc')
>>> coerced = coerce_charptr_slice(b)
>>> coerced == b[:2] or coerced
True
>>> isinstance(coerced, bytearray) or type(coerced)
True
"""
return b[:2]
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment