Commit 8524b21f authored by Stefan Behnel's avatar Stefan Behnel

support bytearray as auto encoding string type

--HG--
rename : tests/run/str_ascii_auto_encoding.pyx => tests/run/bytearray_ascii_auto_encoding.pyx
rename : tests/run/str_default_auto_encoding.pyx => tests/run/bytearray_default_auto_encoding.pyx
parent 1bd61835
...@@ -9,7 +9,8 @@ Features added ...@@ -9,7 +9,8 @@ Features added
-------------- --------------
* ``bytearray`` has become a known type and supports coercion from and * ``bytearray`` has become a known type and supports coercion from and
to C strings. Indexing, slicing and decoding is optimised. to C strings. Indexing, slicing and decoding is optimised. Note that
this may have an impact on existing code due to type inference.
* Using ``cdef basestring stringvar`` and function arguments typed as * Using ``cdef basestring stringvar`` and function arguments typed as
``basestring`` is now meaningful and allows assigning exactly ``basestring`` is now meaningful and allows assigning exactly
......
...@@ -99,6 +99,7 @@ def find_coercion_error(type_tuple, default, env): ...@@ -99,6 +99,7 @@ def find_coercion_error(type_tuple, default, env):
def default_str_type(env): def default_str_type(env):
return { return {
'bytes': bytes_type, 'bytes': bytes_type,
'bytearray': bytearray_type,
'str': str_type, 'str': str_type,
'unicode': unicode_type 'unicode': unicode_type
}.get(env.directives['c_string_type']) }.get(env.directives['c_string_type'])
......
...@@ -580,16 +580,20 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode): ...@@ -580,16 +580,20 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
c_string_type = env.directives['c_string_type'] c_string_type = env.directives['c_string_type']
c_string_encoding = env.directives['c_string_encoding'] c_string_encoding = env.directives['c_string_encoding']
if c_string_type != 'bytes' and not c_string_encoding: if c_string_type not in ('bytes', 'bytearray') and not c_string_encoding:
error(self.pos, "a default encoding must be provided if c_string_type != bytes") error(self.pos, "a default encoding must be provided if c_string_type is not a byte type")
code.putln('#define __PYX_DEFAULT_STRING_ENCODING_IS_ASCII %s' % int(c_string_encoding == 'ascii')) code.putln('#define __PYX_DEFAULT_STRING_ENCODING_IS_ASCII %s' % int(c_string_encoding == 'ascii'))
if c_string_encoding == 'default': if c_string_encoding == 'default':
code.putln('#define __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT 1') code.putln('#define __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT 1')
else: else:
code.putln('#define __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT 0') code.putln('#define __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT 0')
code.putln('#define __PYX_DEFAULT_STRING_ENCODING "%s"' % c_string_encoding) code.putln('#define __PYX_DEFAULT_STRING_ENCODING "%s"' % c_string_encoding)
code.putln('#define __Pyx_PyObject_FromString __Pyx_Py%s_FromString' % c_string_type.title()) if c_string_type == 'bytearray':
code.putln('#define __Pyx_PyObject_FromStringAndSize __Pyx_Py%s_FromStringAndSize' % c_string_type.title()) c_string_func_name = 'ByteArray'
else:
c_string_func_name = c_string_type.title()
code.putln('#define __Pyx_PyObject_FromString __Pyx_Py%s_FromString' % c_string_func_name)
code.putln('#define __Pyx_PyObject_FromStringAndSize __Pyx_Py%s_FromStringAndSize' % c_string_func_name)
code.put(UtilityCode.load_as_string("TypeConversions", "TypeConversion.c")[0]) code.put(UtilityCode.load_as_string("TypeConversions", "TypeConversion.c")[0])
# These utility functions are assumed to exist and used elsewhere. # These utility functions are assumed to exist and used elsewhere.
......
...@@ -208,7 +208,7 @@ directive_types = { ...@@ -208,7 +208,7 @@ directive_types = {
'returns' : type, 'returns' : type,
'set_initial_path': str, 'set_initial_path': str,
'freelist': int, 'freelist': int,
'c_string_type': one_of('bytes', 'str', 'unicode'), 'c_string_type': one_of('bytes', 'bytearray', 'str', 'unicode'),
'c_string_encoding': normalise_encoding_name, 'c_string_encoding': normalise_encoding_name,
} }
...@@ -254,11 +254,13 @@ def parse_directive_value(name, value, relaxed_bool=False): ...@@ -254,11 +254,13 @@ def parse_directive_value(name, value, relaxed_bool=False):
'str' 'str'
>>> parse_directive_value('c_string_type', 'bytes') >>> parse_directive_value('c_string_type', 'bytes')
'bytes' 'bytes'
>>> parse_directive_value('c_string_type', 'bytearray')
'bytearray'
>>> parse_directive_value('c_string_type', 'unicode') >>> parse_directive_value('c_string_type', 'unicode')
'unicode' 'unicode'
>>> parse_directive_value('c_string_type', 'unnicode') >>> parse_directive_value('c_string_type', 'unnicode')
Traceback (most recent call last): Traceback (most recent call last):
ValueError: c_string_type directive must be one of ('bytes', 'str', 'unicode'), got 'unnicode' ValueError: c_string_type directive must be one of ('bytes', 'bytearray', 'str', 'unicode'), got 'unnicode'
""" """
type = directive_types.get(name) type = directive_types.get(name)
if not type: return None if not type: return None
......
...@@ -237,6 +237,8 @@ VER_DEP_MODULES = { ...@@ -237,6 +237,8 @@ VER_DEP_MODULES = {
'run.struct_conversion', 'run.struct_conversion',
'run.bytearray_coercion', 'run.bytearray_coercion',
'run.bytearraymethods', 'run.bytearraymethods',
'run.bytearray_ascii_auto_encoding',
'run.bytearray_default_auto_encoding',
# memory views require buffer protocol # memory views require buffer protocol
'memoryview.relaxed_strides', 'memoryview.relaxed_strides',
'memoryview.cythonarray', 'memoryview.cythonarray',
......
#cython: c_string_type = bytearray
#cython: c_string_encoding = ascii
"End of first directives"
include "unicode_ascii_auto_encoding.pyx"
auto_string_type = bytearray
def check_auto_string_type():
"""
>>> check_auto_string_type()
"""
assert auto_string_type is bytearray
# cython: c_string_type = bytearray
# cython: c_string_encoding = default
import sys
if sys.version_info[0] >= 3:
__doc__ = r"""
>>> isinstance(as_objects("ab\xff"), bytearray)
True
>>> as_objects("ab\xff") == bytearray("ab\xff".encode())
True
>>> isinstance(slice_as_objects("ab\xff", 1, 4), bytearray)
True
>>> slice_as_objects("ab\xffd", 1, 4) == bytearray("b\xff".encode())
True
"""
include "bytearray_ascii_auto_encoding.pyx"
...@@ -6,10 +6,20 @@ auto_string_type = unicode ...@@ -6,10 +6,20 @@ auto_string_type = unicode
from libc.string cimport strcmp from libc.string cimport strcmp
def _as_string(x):
try:
return x.decode('latin1')
except AttributeError:
return x
def as_objects(char* ascii_data): def as_objects(char* ascii_data):
""" """
>>> print(as_objects('abc')) >>> x = as_objects('abc')
abc >>> isinstance(x, auto_string_type) or type(x)
True
>>> _as_string(x) == 'abc' or repr(x)
True
""" """
assert isinstance(<object>ascii_data, auto_string_type) assert isinstance(<object>ascii_data, auto_string_type)
assert isinstance(<bytes>ascii_data, bytes) assert isinstance(<bytes>ascii_data, bytes)
...@@ -30,8 +40,11 @@ def from_object(): ...@@ -30,8 +40,11 @@ def from_object():
def slice_as_objects(char* ascii_data, int start, int end): def slice_as_objects(char* ascii_data, int start, int end):
""" """
>>> print(slice_as_objects('grok', 1, 3)) >>> x = slice_as_objects('grok', 1, 3)
ro >>> isinstance(x, auto_string_type) or type(x)
True
>>> _as_string(x) == 'ro' or repr(x)
True
""" """
assert isinstance(<object>ascii_data[start:end], auto_string_type) assert isinstance(<object>ascii_data[start:end], auto_string_type)
assert isinstance(<bytes>ascii_data[start:end], bytes) assert isinstance(<bytes>ascii_data[start:end], bytes)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment