Commit 45ca772e authored by Stefan Behnel's avatar Stefan Behnel

optimise bytearray.decode()

--HG--
rename : tests/run/bytesmethods.pyx => tests/run/bytearraymethods.pyx
parent fe77570a
...@@ -2788,11 +2788,11 @@ class OptimizeBuiltinCalls(Visitor.MethodDispatcherTransform): ...@@ -2788,11 +2788,11 @@ class OptimizeBuiltinCalls(Visitor.MethodDispatcherTransform):
string_node = string_node.arg string_node = string_node.arg
string_type = string_node.type string_type = string_node.type
if string_type is Builtin.bytes_type: if string_type in (Builtin.bytes_type, Builtin.bytearray_type):
if is_unbound_method: if is_unbound_method:
string_node = string_node.as_none_safe_node( string_node = string_node.as_none_safe_node(
"descriptor '%s' requires a '%s' object but received a 'NoneType'", "descriptor '%s' requires a '%s' object but received a 'NoneType'",
format_args = ['decode', 'bytes']) format_args = ['decode', string_type.name])
else: else:
string_node = string_node.as_none_safe_node( string_node = string_node.as_none_safe_node(
"'NoneType' object has no attribute '%s'", "'NoneType' object has no attribute '%s'",
...@@ -2862,12 +2862,15 @@ class OptimizeBuiltinCalls(Visitor.MethodDispatcherTransform): ...@@ -2862,12 +2862,15 @@ class OptimizeBuiltinCalls(Visitor.MethodDispatcherTransform):
helper_func_type = self._decode_cpp_string_func_type helper_func_type = self._decode_cpp_string_func_type
utility_code_name = 'decode_cpp_string' utility_code_name = 'decode_cpp_string'
else: else:
# Python bytes object # Python bytes/bytearray object
if not stop: if not stop:
stop = ExprNodes.IntNode(node.pos, value='PY_SSIZE_T_MAX', stop = ExprNodes.IntNode(node.pos, value='PY_SSIZE_T_MAX',
constant_result=ExprNodes.not_a_constant) constant_result=ExprNodes.not_a_constant)
helper_func_type = self._decode_bytes_func_type helper_func_type = self._decode_bytes_func_type
utility_code_name = 'decode_bytes' if string_type is Builtin.bytes_type:
utility_code_name = 'decode_bytes'
else:
utility_code_name = 'decode_bytearray'
node = ExprNodes.PythonCapiCallNode( node = ExprNodes.PythonCapiCallNode(
node.pos, '__Pyx_%s' % utility_code_name, helper_func_type, node.pos, '__Pyx_%s' % utility_code_name, helper_func_type,
...@@ -2880,6 +2883,8 @@ class OptimizeBuiltinCalls(Visitor.MethodDispatcherTransform): ...@@ -2880,6 +2883,8 @@ class OptimizeBuiltinCalls(Visitor.MethodDispatcherTransform):
node = UtilNodes.EvalWithTempExprNode(temp, node) node = UtilNodes.EvalWithTempExprNode(temp, node)
return node return node
_handle_simple_method_bytearray_decode = _handle_simple_method_bytes_decode
def _find_special_codec_name(self, encoding): def _find_special_codec_name(self, encoding):
try: try:
requested_codec = codecs.getencoder(encoding) requested_codec = codecs.getencoder(encoding)
...@@ -2957,6 +2962,18 @@ class OptimizeBuiltinCalls(Visitor.MethodDispatcherTransform): ...@@ -2957,6 +2962,18 @@ class OptimizeBuiltinCalls(Visitor.MethodDispatcherTransform):
node, function, args, is_unbound_method, 'bytes', 'startswith', node, function, args, is_unbound_method, 'bytes', 'startswith',
bytes_tailmatch_utility_code, -1) bytes_tailmatch_utility_code, -1)
''' # disabled for now, enable when we consider it worth it (see StringTools.c)
def _handle_simple_method_bytearray_endswith(self, node, function, args, is_unbound_method):
return self._inject_tailmatch(
node, function, args, is_unbound_method, 'bytearray', 'endswith',
bytes_tailmatch_utility_code, +1)
def _handle_simple_method_bytearray_startswith(self, node, function, args, is_unbound_method):
return self._inject_tailmatch(
node, function, args, is_unbound_method, 'bytearray', 'startswith',
bytes_tailmatch_utility_code, -1)
'''
### helpers ### helpers
def _substitute_method_call(self, node, function, name, func_type, def _substitute_method_call(self, node, function, name, func_type,
......
...@@ -279,41 +279,15 @@ static CYTHON_INLINE Py_UCS4 __Pyx_GetItemInt_Unicode_Generic(PyObject* ustring, ...@@ -279,41 +279,15 @@ static CYTHON_INLINE Py_UCS4 __Pyx_GetItemInt_Unicode_Generic(PyObject* ustring,
} }
/////////////// decode_cpp_string.proto /////////////// /////////////// decode_cpp_string.proto ///////////////
//@requires IncludeCppStringH //@requires: IncludeCppStringH
//@requires: decode_c_bytes
static CYTHON_INLINE PyObject* __Pyx_decode_cpp_string(
std::string cppstring, Py_ssize_t start, Py_ssize_t stop,
const char* encoding, const char* errors,
PyObject* (*decode_func)(const char *s, Py_ssize_t size, const char *errors));
/////////////// decode_cpp_string ///////////////
static CYTHON_INLINE PyObject* __Pyx_decode_cpp_string( static CYTHON_INLINE PyObject* __Pyx_decode_cpp_string(
std::string cppstring, Py_ssize_t start, Py_ssize_t stop, std::string cppstring, Py_ssize_t start, Py_ssize_t stop,
const char* encoding, const char* errors, const char* encoding, const char* errors,
PyObject* (*decode_func)(const char *s, Py_ssize_t size, const char *errors)) { PyObject* (*decode_func)(const char *s, Py_ssize_t size, const char *errors)) {
const char* cstring = cppstring.data(); return __Pyx_decode_c_bytes(
Py_ssize_t length = cppstring.size(); cppstring.data(), cppstring.size(), start, stop, encoding, errors, decode_func);
if (unlikely(start < 0)) {
start += length;
if (unlikely(start < 0))
start = 0;
}
if (unlikely(stop < 0))
stop += length;
else if (stop >= length)
stop = length;
if (unlikely(start >= stop))
return PyUnicode_FromUnicode(NULL, 0);
cstring += start;
length = stop - start;
if (decode_func) {
return decode_func(cstring, length, errors);
} else {
return PyUnicode_Decode(cstring, length, encoding, errors);
}
} }
/////////////// decode_c_string.proto /////////////// /////////////// decode_c_string.proto ///////////////
...@@ -324,7 +298,9 @@ static CYTHON_INLINE PyObject* __Pyx_decode_c_string( ...@@ -324,7 +298,9 @@ static CYTHON_INLINE PyObject* __Pyx_decode_c_string(
PyObject* (*decode_func)(const char *s, Py_ssize_t size, const char *errors)); PyObject* (*decode_func)(const char *s, Py_ssize_t size, const char *errors));
/////////////// decode_c_string /////////////// /////////////// decode_c_string ///////////////
//@requires IncludeStringH //@requires: IncludeStringH
/* duplicate code to avoid calling strlen() if start >= 0 and stop >= 0 */
static CYTHON_INLINE PyObject* __Pyx_decode_c_string( static CYTHON_INLINE PyObject* __Pyx_decode_c_string(
const char* cstring, Py_ssize_t start, Py_ssize_t stop, const char* cstring, Py_ssize_t start, Py_ssize_t stop,
...@@ -352,21 +328,19 @@ static CYTHON_INLINE PyObject* __Pyx_decode_c_string( ...@@ -352,21 +328,19 @@ static CYTHON_INLINE PyObject* __Pyx_decode_c_string(
} }
} }
/////////////// decode_bytes.proto /////////////// /////////////// decode_c_bytes.proto ///////////////
static CYTHON_INLINE PyObject* __Pyx_decode_bytes( static CYTHON_INLINE PyObject* __Pyx_decode_c_bytes(
PyObject* string, Py_ssize_t start, Py_ssize_t stop, const char* cstring, Py_ssize_t length, Py_ssize_t start, Py_ssize_t stop,
const char* encoding, const char* errors, const char* encoding, const char* errors,
PyObject* (*decode_func)(const char *s, Py_ssize_t size, const char *errors)); PyObject* (*decode_func)(const char *s, Py_ssize_t size, const char *errors));
/////////////// decode_bytes /////////////// /////////////// decode_c_bytes ///////////////
static CYTHON_INLINE PyObject* __Pyx_decode_bytes( static CYTHON_INLINE PyObject* __Pyx_decode_c_bytes(
PyObject* string, Py_ssize_t start, Py_ssize_t stop, const char* cstring, Py_ssize_t length, Py_ssize_t start, Py_ssize_t stop,
const char* encoding, const char* errors, const char* encoding, const char* errors,
PyObject* (*decode_func)(const char *s, Py_ssize_t size, const char *errors)) { PyObject* (*decode_func)(const char *s, Py_ssize_t size, const char *errors)) {
char* cstring;
Py_ssize_t length = PyBytes_GET_SIZE(string);
if (unlikely((start < 0) | (stop < 0))) { if (unlikely((start < 0) | (stop < 0))) {
if (start < 0) { if (start < 0) {
start += length; start += length;
...@@ -381,7 +355,7 @@ static CYTHON_INLINE PyObject* __Pyx_decode_bytes( ...@@ -381,7 +355,7 @@ static CYTHON_INLINE PyObject* __Pyx_decode_bytes(
length = stop - start; length = stop - start;
if (unlikely(length <= 0)) if (unlikely(length <= 0))
return PyUnicode_FromUnicode(NULL, 0); return PyUnicode_FromUnicode(NULL, 0);
cstring = PyBytes_AS_STRING(string) + start; cstring += start;
if (decode_func) { if (decode_func) {
return decode_func(cstring, length, errors); return decode_func(cstring, length, errors);
} else { } else {
...@@ -389,6 +363,30 @@ static CYTHON_INLINE PyObject* __Pyx_decode_bytes( ...@@ -389,6 +363,30 @@ static CYTHON_INLINE PyObject* __Pyx_decode_bytes(
} }
} }
/////////////// decode_bytes.proto ///////////////
//@requires: decode_c_bytes
static CYTHON_INLINE PyObject* __Pyx_decode_bytes(
PyObject* string, Py_ssize_t start, Py_ssize_t stop,
const char* encoding, const char* errors,
PyObject* (*decode_func)(const char *s, Py_ssize_t size, const char *errors)) {
return __Pyx_decode_c_bytes(
PyBytes_AS_STRING(string), PyBytes_GET_SIZE(string),
start, stop, encoding, errors, decode_func);
}
/////////////// decode_bytearray.proto ///////////////
//@requires: decode_c_bytes
static CYTHON_INLINE PyObject* __Pyx_decode_bytearray(
PyObject* string, Py_ssize_t start, Py_ssize_t stop,
const char* encoding, const char* errors,
PyObject* (*decode_func)(const char *s, Py_ssize_t size, const char *errors)) {
return __Pyx_decode_c_bytes(
PyByteArray_AS_STRING(string), PyByteArray_GET_SIZE(string),
start, stop, encoding, errors, decode_func);
}
/////////////// PyUnicode_Substring.proto /////////////// /////////////// PyUnicode_Substring.proto ///////////////
static CYTHON_INLINE PyObject* __Pyx_PyUnicode_Substring( static CYTHON_INLINE PyObject* __Pyx_PyUnicode_Substring(
......
...@@ -235,7 +235,7 @@ VER_DEP_MODULES = { ...@@ -235,7 +235,7 @@ VER_DEP_MODULES = {
'run.pure_py', # decorators, with statement 'run.pure_py', # decorators, with statement
'run.purecdef', 'run.purecdef',
'run.struct_conversion', 'run.struct_conversion',
'run.bytearray_coercion', 'run.bytearray',
# memory views require buffer protocol # memory views require buffer protocol
'memoryview.relaxed_strides', 'memoryview.relaxed_strides',
'memoryview.cythonarray', 'memoryview.cythonarray',
......
cimport cython
b_a = bytearray(b'a')
b_b = bytearray(b'b')
''' # disabled for now, enable when we consider it worth the code overhead
@cython.test_assert_path_exists(
"//PythonCapiCallNode")
@cython.test_fail_if_path_exists(
"//SimpleCallNode")
def bytearray_startswith(bytearray s, sub, start=None, stop=None):
"""
>>> bytearray_startswith(b_a, b_a)
True
>>> bytearray_startswith(b_a+b_b, b_a)
True
>>> bytearray_startswith(b_a, b_b)
False
>>> bytearray_startswith(b_a+b_b, b_b)
False
>>> bytearray_startswith(b_a, (b_a, b_b))
True
>>> bytearray_startswith(b_a, b_a, 1)
False
>>> bytearray_startswith(b_a, b_a, 0, 0)
False
"""
if start is None:
return s.startswith(sub)
elif stop is None:
return s.startswith(sub, start)
else:
return s.startswith(sub, start, stop)
@cython.test_assert_path_exists(
"//PythonCapiCallNode")
@cython.test_fail_if_path_exists(
"//SimpleCallNode")
def bytearray_endswith(bytearray s, sub, start=None, stop=None):
"""
>>> bytearray_endswith(b_a, b_a)
True
>>> bytearray_endswith(b_b+b_a, b_a)
True
>>> bytearray_endswith(b_a, b_b)
False
>>> bytearray_endswith(b_b+b_a, b_b)
False
>>> bytearray_endswith(b_a, (b_a, b_b))
True
>>> bytearray_endswith(b_a, b_a, 1)
False
>>> bytearray_endswith(b_a, b_a, 0, 0)
False
"""
if start is None:
return s.endswith(sub)
elif stop is None:
return s.endswith(sub, start)
else:
return s.endswith(sub, start, stop)
'''
@cython.test_assert_path_exists(
"//PythonCapiCallNode")
@cython.test_fail_if_path_exists(
"//SimpleCallNode")
def bytearray_decode(bytearray s, start=None, stop=None):
"""
>>> s = b_a+b_b+b_a+b_a+b_b
>>> print(bytearray_decode(s))
abaab
>>> print(bytearray_decode(s, 2))
aab
>>> print(bytearray_decode(s, -3))
aab
>>> print(bytearray_decode(s, None, 4))
abaa
>>> print(bytearray_decode(s, None, 400))
abaab
>>> print(bytearray_decode(s, None, -2))
aba
>>> print(bytearray_decode(s, None, -4))
a
>>> print(bytearray_decode(s, None, -5))
<BLANKLINE>
>>> print(bytearray_decode(s, None, -200))
<BLANKLINE>
>>> print(bytearray_decode(s, 2, 5))
aab
>>> print(bytearray_decode(s, 2, 500))
aab
>>> print(bytearray_decode(s, 2, -1))
aa
>>> print(bytearray_decode(s, 2, -3))
<BLANKLINE>
>>> print(bytearray_decode(s, 2, -300))
<BLANKLINE>
>>> print(bytearray_decode(s, -3, -1))
aa
>>> print(bytearray_decode(s, -300, 300))
abaab
>>> print(bytearray_decode(s, -300, -4))
a
>>> print(bytearray_decode(s, -300, -5))
<BLANKLINE>
>>> print(bytearray_decode(s, -300, -6))
<BLANKLINE>
>>> print(bytearray_decode(s, -300, -500))
<BLANKLINE>
>>> s[:'test'] # doctest: +ELLIPSIS
Traceback (most recent call last):
TypeError:...
>>> print(bytearray_decode(s, 'test')) # doctest: +ELLIPSIS
Traceback (most recent call last):
TypeError:...
>>> print(bytearray_decode(s, None, 'test')) # doctest: +ELLIPSIS
Traceback (most recent call last):
TypeError:...
>>> print(bytearray_decode(s, 'test', 'test')) # doctest: +ELLIPSIS
Traceback (most recent call last):
TypeError:...
>>> print(bytearray_decode(None))
Traceback (most recent call last):
AttributeError: 'NoneType' object has no attribute 'decode'
>>> print(bytearray_decode(None, 1))
Traceback (most recent call last):
TypeError: 'NoneType' object is not subscriptable
>>> print(bytearray_decode(None, None, 1))
Traceback (most recent call last):
TypeError: 'NoneType' object is not subscriptable
>>> print(bytearray_decode(None, 0, 1))
Traceback (most recent call last):
TypeError: 'NoneType' object is not subscriptable
"""
if start is None:
if stop is None:
return s.decode('utf8')
else:
return s[:stop].decode('utf8')
elif stop is None:
return s[start:].decode('utf8')
else:
return s[start:stop].decode('utf8')
@cython.test_assert_path_exists(
"//PythonCapiCallNode")
@cython.test_fail_if_path_exists(
"//SimpleCallNode")
def bytearray_decode_unbound_method(bytearray s, start=None, stop=None):
"""
>>> s = b_a+b_b+b_a+b_a+b_b
>>> print(bytearray_decode_unbound_method(s))
abaab
>>> print(bytearray_decode_unbound_method(s, 1))
baab
>>> print(bytearray_decode_unbound_method(s, None, 3))
aba
>>> print(bytearray_decode_unbound_method(s, 1, 4))
baa
>>> print(bytearray_decode_unbound_method(None))
Traceback (most recent call last):
TypeError: descriptor 'decode' requires a 'bytearray' object but received a 'NoneType'
>>> print(bytearray_decode_unbound_method(None, 1))
Traceback (most recent call last):
TypeError: 'NoneType' object is not subscriptable
>>> print(bytearray_decode_unbound_method(None, None, 1))
Traceback (most recent call last):
TypeError: 'NoneType' object is not subscriptable
>>> print(bytearray_decode_unbound_method(None, 0, 1))
Traceback (most recent call last):
TypeError: 'NoneType' object is not subscriptable
"""
if start is None:
if stop is None:
return bytearray.decode(s, 'utf8')
else:
return bytearray.decode(s[:stop], 'utf8')
elif stop is None:
return bytearray.decode(s[start:], 'utf8')
else:
return bytearray.decode(s[start:stop], 'utf8')
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment