Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cython
Commits
45ca772e
Commit
45ca772e
authored
Nov 03, 2013
by
Stefan Behnel
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
optimise bytearray.decode()
--HG-- rename : tests/run/bytesmethods.pyx => tests/run/bytearraymethods.pyx
parent
fe77570a
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
255 additions
and
45 deletions
+255
-45
Cython/Compiler/Optimize.py
Cython/Compiler/Optimize.py
+21
-4
Cython/Utility/StringTools.c
Cython/Utility/StringTools.c
+38
-40
runtests.py
runtests.py
+1
-1
tests/run/bytearraymethods.pyx
tests/run/bytearraymethods.pyx
+195
-0
No files found.
Cython/Compiler/Optimize.py
View file @
45ca772e
...
...
@@ -2788,11 +2788,11 @@ class OptimizeBuiltinCalls(Visitor.MethodDispatcherTransform):
string_node
=
string_node
.
arg
string_type
=
string_node
.
type
if
string_type
i
s
Builtin
.
bytes_type
:
if
string_type
i
n
(
Builtin
.
bytes_type
,
Builtin
.
bytearray_type
)
:
if
is_unbound_method
:
string_node
=
string_node
.
as_none_safe_node
(
"descriptor '%s' requires a '%s' object but received a 'NoneType'"
,
format_args
=
[
'decode'
,
'bytes'
])
format_args
=
[
'decode'
,
string_type
.
name
])
else
:
string_node
=
string_node
.
as_none_safe_node
(
"'NoneType' object has no attribute '%s'"
,
...
...
@@ -2862,12 +2862,15 @@ class OptimizeBuiltinCalls(Visitor.MethodDispatcherTransform):
helper_func_type
=
self
.
_decode_cpp_string_func_type
utility_code_name
=
'decode_cpp_string'
else
:
# Python bytes object
# Python bytes
/bytearray
object
if
not
stop
:
stop
=
ExprNodes
.
IntNode
(
node
.
pos
,
value
=
'PY_SSIZE_T_MAX'
,
constant_result
=
ExprNodes
.
not_a_constant
)
helper_func_type
=
self
.
_decode_bytes_func_type
utility_code_name
=
'decode_bytes'
if
string_type
is
Builtin
.
bytes_type
:
utility_code_name
=
'decode_bytes'
else
:
utility_code_name
=
'decode_bytearray'
node
=
ExprNodes
.
PythonCapiCallNode
(
node
.
pos
,
'__Pyx_%s'
%
utility_code_name
,
helper_func_type
,
...
...
@@ -2880,6 +2883,8 @@ class OptimizeBuiltinCalls(Visitor.MethodDispatcherTransform):
node
=
UtilNodes
.
EvalWithTempExprNode
(
temp
,
node
)
return
node
_handle_simple_method_bytearray_decode
=
_handle_simple_method_bytes_decode
def
_find_special_codec_name
(
self
,
encoding
):
try
:
requested_codec
=
codecs
.
getencoder
(
encoding
)
...
...
@@ -2957,6 +2962,18 @@ class OptimizeBuiltinCalls(Visitor.MethodDispatcherTransform):
node
,
function
,
args
,
is_unbound_method
,
'bytes'
,
'startswith'
,
bytes_tailmatch_utility_code
,
-
1
)
''' # disabled for now, enable when we consider it worth it (see StringTools.c)
def _handle_simple_method_bytearray_endswith(self, node, function, args, is_unbound_method):
return self._inject_tailmatch(
node, function, args, is_unbound_method, 'bytearray', 'endswith',
bytes_tailmatch_utility_code, +1)
def _handle_simple_method_bytearray_startswith(self, node, function, args, is_unbound_method):
return self._inject_tailmatch(
node, function, args, is_unbound_method, 'bytearray', 'startswith',
bytes_tailmatch_utility_code, -1)
'''
### helpers
def
_substitute_method_call
(
self
,
node
,
function
,
name
,
func_type
,
...
...
Cython/Utility/StringTools.c
View file @
45ca772e
...
...
@@ -279,41 +279,15 @@ static CYTHON_INLINE Py_UCS4 __Pyx_GetItemInt_Unicode_Generic(PyObject* ustring,
}
/////////////// decode_cpp_string.proto ///////////////
//@requires IncludeCppStringH
static
CYTHON_INLINE
PyObject
*
__Pyx_decode_cpp_string
(
std
::
string
cppstring
,
Py_ssize_t
start
,
Py_ssize_t
stop
,
const
char
*
encoding
,
const
char
*
errors
,
PyObject
*
(
*
decode_func
)(
const
char
*
s
,
Py_ssize_t
size
,
const
char
*
errors
));
/////////////// decode_cpp_string ///////////////
//@requires: IncludeCppStringH
//@requires: decode_c_bytes
static
CYTHON_INLINE
PyObject
*
__Pyx_decode_cpp_string
(
std
::
string
cppstring
,
Py_ssize_t
start
,
Py_ssize_t
stop
,
const
char
*
encoding
,
const
char
*
errors
,
PyObject
*
(
*
decode_func
)(
const
char
*
s
,
Py_ssize_t
size
,
const
char
*
errors
))
{
const
char
*
cstring
=
cppstring
.
data
();
Py_ssize_t
length
=
cppstring
.
size
();
if
(
unlikely
(
start
<
0
))
{
start
+=
length
;
if
(
unlikely
(
start
<
0
))
start
=
0
;
}
if
(
unlikely
(
stop
<
0
))
stop
+=
length
;
else
if
(
stop
>=
length
)
stop
=
length
;
if
(
unlikely
(
start
>=
stop
))
return
PyUnicode_FromUnicode
(
NULL
,
0
);
cstring
+=
start
;
length
=
stop
-
start
;
if
(
decode_func
)
{
return
decode_func
(
cstring
,
length
,
errors
);
}
else
{
return
PyUnicode_Decode
(
cstring
,
length
,
encoding
,
errors
);
}
return
__Pyx_decode_c_bytes
(
cppstring
.
data
(),
cppstring
.
size
(),
start
,
stop
,
encoding
,
errors
,
decode_func
);
}
/////////////// decode_c_string.proto ///////////////
...
...
@@ -324,7 +298,9 @@ static CYTHON_INLINE PyObject* __Pyx_decode_c_string(
PyObject
*
(
*
decode_func
)(
const
char
*
s
,
Py_ssize_t
size
,
const
char
*
errors
));
/////////////// decode_c_string ///////////////
//@requires IncludeStringH
//@requires: IncludeStringH
/* duplicate code to avoid calling strlen() if start >= 0 and stop >= 0 */
static
CYTHON_INLINE
PyObject
*
__Pyx_decode_c_string
(
const
char
*
cstring
,
Py_ssize_t
start
,
Py_ssize_t
stop
,
...
...
@@ -352,21 +328,19 @@ static CYTHON_INLINE PyObject* __Pyx_decode_c_string(
}
}
/////////////// decode_bytes.proto ///////////////
/////////////// decode_
c_
bytes.proto ///////////////
static
CYTHON_INLINE
PyObject
*
__Pyx_decode_bytes
(
PyObject
*
string
,
Py_ssize_t
start
,
Py_ssize_t
stop
,
static
CYTHON_INLINE
PyObject
*
__Pyx_decode_
c_
bytes
(
const
char
*
cstring
,
Py_ssize_t
length
,
Py_ssize_t
start
,
Py_ssize_t
stop
,
const
char
*
encoding
,
const
char
*
errors
,
PyObject
*
(
*
decode_func
)(
const
char
*
s
,
Py_ssize_t
size
,
const
char
*
errors
));
/////////////// decode_bytes ///////////////
/////////////// decode_
c_
bytes ///////////////
static
CYTHON_INLINE
PyObject
*
__Pyx_decode_bytes
(
PyObject
*
string
,
Py_ssize_t
start
,
Py_ssize_t
stop
,
static
CYTHON_INLINE
PyObject
*
__Pyx_decode_
c_
bytes
(
const
char
*
cstring
,
Py_ssize_t
length
,
Py_ssize_t
start
,
Py_ssize_t
stop
,
const
char
*
encoding
,
const
char
*
errors
,
PyObject
*
(
*
decode_func
)(
const
char
*
s
,
Py_ssize_t
size
,
const
char
*
errors
))
{
char
*
cstring
;
Py_ssize_t
length
=
PyBytes_GET_SIZE
(
string
);
if
(
unlikely
((
start
<
0
)
|
(
stop
<
0
)))
{
if
(
start
<
0
)
{
start
+=
length
;
...
...
@@ -381,7 +355,7 @@ static CYTHON_INLINE PyObject* __Pyx_decode_bytes(
length
=
stop
-
start
;
if
(
unlikely
(
length
<=
0
))
return
PyUnicode_FromUnicode
(
NULL
,
0
);
cstring
=
PyBytes_AS_STRING
(
string
)
+
start
;
cstring
+=
start
;
if
(
decode_func
)
{
return
decode_func
(
cstring
,
length
,
errors
);
}
else
{
...
...
@@ -389,6 +363,30 @@ static CYTHON_INLINE PyObject* __Pyx_decode_bytes(
}
}
/////////////// decode_bytes.proto ///////////////
//@requires: decode_c_bytes
static
CYTHON_INLINE
PyObject
*
__Pyx_decode_bytes
(
PyObject
*
string
,
Py_ssize_t
start
,
Py_ssize_t
stop
,
const
char
*
encoding
,
const
char
*
errors
,
PyObject
*
(
*
decode_func
)(
const
char
*
s
,
Py_ssize_t
size
,
const
char
*
errors
))
{
return
__Pyx_decode_c_bytes
(
PyBytes_AS_STRING
(
string
),
PyBytes_GET_SIZE
(
string
),
start
,
stop
,
encoding
,
errors
,
decode_func
);
}
/////////////// decode_bytearray.proto ///////////////
//@requires: decode_c_bytes
static
CYTHON_INLINE
PyObject
*
__Pyx_decode_bytearray
(
PyObject
*
string
,
Py_ssize_t
start
,
Py_ssize_t
stop
,
const
char
*
encoding
,
const
char
*
errors
,
PyObject
*
(
*
decode_func
)(
const
char
*
s
,
Py_ssize_t
size
,
const
char
*
errors
))
{
return
__Pyx_decode_c_bytes
(
PyByteArray_AS_STRING
(
string
),
PyByteArray_GET_SIZE
(
string
),
start
,
stop
,
encoding
,
errors
,
decode_func
);
}
/////////////// PyUnicode_Substring.proto ///////////////
static
CYTHON_INLINE
PyObject
*
__Pyx_PyUnicode_Substring
(
...
...
runtests.py
View file @
45ca772e
...
...
@@ -235,7 +235,7 @@ VER_DEP_MODULES = {
'run.pure_py', # decorators, with statement
'run.purecdef',
'run.struct_conversion',
'run.bytearray
_coercion
',
'run.bytearray',
# memory views require buffer protocol
'memoryview.relaxed_strides',
'memoryview.cythonarray',
...
...
tests/run/bytearraymethods.pyx
0 → 100644
View file @
45ca772e
cimport
cython
b_a
=
bytearray
(
b'a'
)
b_b
=
bytearray
(
b'b'
)
''' # disabled for now, enable when we consider it worth the code overhead
@cython.test_assert_path_exists(
"//PythonCapiCallNode")
@cython.test_fail_if_path_exists(
"//SimpleCallNode")
def bytearray_startswith(bytearray s, sub, start=None, stop=None):
"""
>>> bytearray_startswith(b_a, b_a)
True
>>> bytearray_startswith(b_a+b_b, b_a)
True
>>> bytearray_startswith(b_a, b_b)
False
>>> bytearray_startswith(b_a+b_b, b_b)
False
>>> bytearray_startswith(b_a, (b_a, b_b))
True
>>> bytearray_startswith(b_a, b_a, 1)
False
>>> bytearray_startswith(b_a, b_a, 0, 0)
False
"""
if start is None:
return s.startswith(sub)
elif stop is None:
return s.startswith(sub, start)
else:
return s.startswith(sub, start, stop)
@cython.test_assert_path_exists(
"//PythonCapiCallNode")
@cython.test_fail_if_path_exists(
"//SimpleCallNode")
def bytearray_endswith(bytearray s, sub, start=None, stop=None):
"""
>>> bytearray_endswith(b_a, b_a)
True
>>> bytearray_endswith(b_b+b_a, b_a)
True
>>> bytearray_endswith(b_a, b_b)
False
>>> bytearray_endswith(b_b+b_a, b_b)
False
>>> bytearray_endswith(b_a, (b_a, b_b))
True
>>> bytearray_endswith(b_a, b_a, 1)
False
>>> bytearray_endswith(b_a, b_a, 0, 0)
False
"""
if start is None:
return s.endswith(sub)
elif stop is None:
return s.endswith(sub, start)
else:
return s.endswith(sub, start, stop)
'''
@
cython
.
test_assert_path_exists
(
"//PythonCapiCallNode"
)
@
cython
.
test_fail_if_path_exists
(
"//SimpleCallNode"
)
def
bytearray_decode
(
bytearray
s
,
start
=
None
,
stop
=
None
):
"""
>>> s = b_a+b_b+b_a+b_a+b_b
>>> print(bytearray_decode(s))
abaab
>>> print(bytearray_decode(s, 2))
aab
>>> print(bytearray_decode(s, -3))
aab
>>> print(bytearray_decode(s, None, 4))
abaa
>>> print(bytearray_decode(s, None, 400))
abaab
>>> print(bytearray_decode(s, None, -2))
aba
>>> print(bytearray_decode(s, None, -4))
a
>>> print(bytearray_decode(s, None, -5))
<BLANKLINE>
>>> print(bytearray_decode(s, None, -200))
<BLANKLINE>
>>> print(bytearray_decode(s, 2, 5))
aab
>>> print(bytearray_decode(s, 2, 500))
aab
>>> print(bytearray_decode(s, 2, -1))
aa
>>> print(bytearray_decode(s, 2, -3))
<BLANKLINE>
>>> print(bytearray_decode(s, 2, -300))
<BLANKLINE>
>>> print(bytearray_decode(s, -3, -1))
aa
>>> print(bytearray_decode(s, -300, 300))
abaab
>>> print(bytearray_decode(s, -300, -4))
a
>>> print(bytearray_decode(s, -300, -5))
<BLANKLINE>
>>> print(bytearray_decode(s, -300, -6))
<BLANKLINE>
>>> print(bytearray_decode(s, -300, -500))
<BLANKLINE>
>>> s[:'test'] # doctest: +ELLIPSIS
Traceback (most recent call last):
TypeError:...
>>> print(bytearray_decode(s, 'test')) # doctest: +ELLIPSIS
Traceback (most recent call last):
TypeError:...
>>> print(bytearray_decode(s, None, 'test')) # doctest: +ELLIPSIS
Traceback (most recent call last):
TypeError:...
>>> print(bytearray_decode(s, 'test', 'test')) # doctest: +ELLIPSIS
Traceback (most recent call last):
TypeError:...
>>> print(bytearray_decode(None))
Traceback (most recent call last):
AttributeError: 'NoneType' object has no attribute 'decode'
>>> print(bytearray_decode(None, 1))
Traceback (most recent call last):
TypeError: 'NoneType' object is not subscriptable
>>> print(bytearray_decode(None, None, 1))
Traceback (most recent call last):
TypeError: 'NoneType' object is not subscriptable
>>> print(bytearray_decode(None, 0, 1))
Traceback (most recent call last):
TypeError: 'NoneType' object is not subscriptable
"""
if
start
is
None
:
if
stop
is
None
:
return
s
.
decode
(
'utf8'
)
else
:
return
s
[:
stop
].
decode
(
'utf8'
)
elif
stop
is
None
:
return
s
[
start
:].
decode
(
'utf8'
)
else
:
return
s
[
start
:
stop
].
decode
(
'utf8'
)
@
cython
.
test_assert_path_exists
(
"//PythonCapiCallNode"
)
@
cython
.
test_fail_if_path_exists
(
"//SimpleCallNode"
)
def
bytearray_decode_unbound_method
(
bytearray
s
,
start
=
None
,
stop
=
None
):
"""
>>> s = b_a+b_b+b_a+b_a+b_b
>>> print(bytearray_decode_unbound_method(s))
abaab
>>> print(bytearray_decode_unbound_method(s, 1))
baab
>>> print(bytearray_decode_unbound_method(s, None, 3))
aba
>>> print(bytearray_decode_unbound_method(s, 1, 4))
baa
>>> print(bytearray_decode_unbound_method(None))
Traceback (most recent call last):
TypeError: descriptor 'decode' requires a 'bytearray' object but received a 'NoneType'
>>> print(bytearray_decode_unbound_method(None, 1))
Traceback (most recent call last):
TypeError: 'NoneType' object is not subscriptable
>>> print(bytearray_decode_unbound_method(None, None, 1))
Traceback (most recent call last):
TypeError: 'NoneType' object is not subscriptable
>>> print(bytearray_decode_unbound_method(None, 0, 1))
Traceback (most recent call last):
TypeError: 'NoneType' object is not subscriptable
"""
if
start
is
None
:
if
stop
is
None
:
return
bytearray
.
decode
(
s
,
'utf8'
)
else
:
return
bytearray
.
decode
(
s
[:
stop
],
'utf8'
)
elif
stop
is
None
:
return
bytearray
.
decode
(
s
[
start
:],
'utf8'
)
else
:
return
bytearray
.
decode
(
s
[
start
:
stop
],
'utf8'
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment