Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Labels
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Commits
Open sidebar
nexedi
cython
Commits
f54625ae
Commit
f54625ae
authored
Apr 25, 2010
by
Stefan Behnel
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
optimised char/Py_UNICODE indexing of bytes/unicode objects
parent
617f3322
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
258 additions
and
7 deletions
+258
-7
Cython/Compiler/Optimize.py
Cython/Compiler/Optimize.py
+106
-0
tests/run/bytes_indexing.pyx
tests/run/bytes_indexing.pyx
+97
-0
tests/run/py_unicode_type.pyx
tests/run/py_unicode_type.pyx
+55
-7
No files found.
Cython/Compiler/Optimize.py
View file @
f54625ae
...
@@ -1187,8 +1187,72 @@ class OptimizeBuiltinCalls(Visitor.EnvTransform):
...
@@ -1187,8 +1187,72 @@ class OptimizeBuiltinCalls(Visitor.EnvTransform):
if
isinstance
(
arg
,
ExprNodes
.
SimpleCallNode
):
if
isinstance
(
arg
,
ExprNodes
.
SimpleCallNode
):
if
node
.
type
.
is_int
or
node
.
type
.
is_float
:
if
node
.
type
.
is_int
or
node
.
type
.
is_float
:
return
self
.
_optimise_numeric_cast_call
(
node
,
arg
)
return
self
.
_optimise_numeric_cast_call
(
node
,
arg
)
elif
isinstance
(
arg
,
ExprNodes
.
IndexNode
)
and
not
arg
.
is_buffer_access
:
index_node
=
arg
.
index
if
isinstance
(
index_node
,
ExprNodes
.
CoerceToPyTypeNode
):
index_node
=
index_node
.
arg
if
index_node
.
type
.
is_int
:
return
self
.
_optimise_int_indexing
(
node
,
arg
,
index_node
)
return
node
return
node
PyUnicode_GetItemInt_func_type
=
PyrexTypes
.
CFuncType
(
PyrexTypes
.
c_py_unicode_type
,
[
PyrexTypes
.
CFuncTypeArg
(
"unicode"
,
Builtin
.
unicode_type
,
None
),
PyrexTypes
.
CFuncTypeArg
(
"index"
,
PyrexTypes
.
c_py_ssize_t_type
,
None
),
PyrexTypes
.
CFuncTypeArg
(
"check_bounds"
,
PyrexTypes
.
c_int_type
,
None
),
],
exception_value
=
"((Py_UNICODE)-1)"
,
exception_check
=
True
)
PyBytes_GetItemInt_func_type
=
PyrexTypes
.
CFuncType
(
PyrexTypes
.
c_char_type
,
[
PyrexTypes
.
CFuncTypeArg
(
"bytes"
,
Builtin
.
bytes_type
,
None
),
PyrexTypes
.
CFuncTypeArg
(
"index"
,
PyrexTypes
.
c_py_ssize_t_type
,
None
),
PyrexTypes
.
CFuncTypeArg
(
"check_bounds"
,
PyrexTypes
.
c_int_type
,
None
),
],
exception_value
=
"((char)-1)"
,
exception_check
=
True
)
def
_optimise_int_indexing
(
self
,
coerce_node
,
arg
,
index_node
):
env
=
self
.
current_env
()
bound_check_bool
=
env
.
directives
[
'boundscheck'
]
and
1
or
0
if
arg
.
base
.
type
is
Builtin
.
unicode_type
:
if
coerce_node
.
type
is
PyrexTypes
.
c_py_unicode_type
:
# unicode[index] -> Py_UNICODE
bound_check_node
=
ExprNodes
.
IntNode
(
coerce_node
.
pos
,
value
=
str
(
bound_check_bool
),
constant_result
=
bound_check_bool
)
return
ExprNodes
.
PythonCapiCallNode
(
coerce_node
.
pos
,
"__Pyx_PyUnicode_GetItemInt"
,
self
.
PyUnicode_GetItemInt_func_type
,
args
=
[
arg
.
base
.
as_none_safe_node
(
env
),
index_node
.
coerce_to
(
PyrexTypes
.
c_py_ssize_t_type
,
env
),
bound_check_node
,
],
is_temp
=
True
,
utility_code
=
unicode_index_utility_code
)
elif
arg
.
base
.
type
is
Builtin
.
bytes_type
:
if
coerce_node
.
type
in
(
PyrexTypes
.
c_char_type
,
PyrexTypes
.
c_uchar_type
):
# bytes[index] -> char
bound_check_node
=
ExprNodes
.
IntNode
(
coerce_node
.
pos
,
value
=
str
(
bound_check_bool
),
constant_result
=
bound_check_bool
)
node
=
ExprNodes
.
PythonCapiCallNode
(
coerce_node
.
pos
,
"__Pyx_PyBytes_GetItemInt"
,
self
.
PyBytes_GetItemInt_func_type
,
args
=
[
arg
.
base
.
as_none_safe_node
(
env
),
index_node
.
coerce_to
(
PyrexTypes
.
c_py_ssize_t_type
,
env
),
bound_check_node
,
],
is_temp
=
True
,
utility_code
=
bytes_index_utility_code
)
if
coerce_node
.
type
is
not
PyrexTypes
.
c_char_type
:
node
=
node
.
coerce_to
(
coerce_node
.
type
,
env
)
return
node
return
coerce_node
def
_optimise_numeric_cast_call
(
self
,
node
,
arg
):
def
_optimise_numeric_cast_call
(
self
,
node
,
arg
):
function
=
arg
.
function
function
=
arg
.
function
if
not
isinstance
(
function
,
ExprNodes
.
NameNode
)
\
if
not
isinstance
(
function
,
ExprNodes
.
NameNode
)
\
...
@@ -2348,6 +2412,48 @@ bad:
...
@@ -2348,6 +2412,48 @@ bad:
)
)
unicode_index_utility_code
=
UtilityCode
(
proto
=
"""
static CYTHON_INLINE Py_UNICODE __Pyx_PyUnicode_GetItemInt(PyObject* unicode, Py_ssize_t index, int check_bounds); /* proto */
"""
,
impl
=
"""
static CYTHON_INLINE Py_UNICODE __Pyx_PyUnicode_GetItemInt(PyObject* unicode, Py_ssize_t index, int check_bounds) {
if (check_bounds) {
if (unlikely(index >= PyUnicode_GET_SIZE(unicode)) |
unlikely(index < -PyUnicode_GET_SIZE(unicode))) {
PyErr_Format(PyExc_IndexError, "string index out of range");
return (Py_UNICODE)-1;
}
}
if (index < 0)
index += PyUnicode_GET_SIZE(unicode);
return PyUnicode_AS_UNICODE(unicode)[index];
}
"""
)
bytes_index_utility_code
=
UtilityCode
(
proto
=
"""
static CYTHON_INLINE char __Pyx_PyBytes_GetItemInt(PyObject* unicode, Py_ssize_t index, int check_bounds); /* proto */
"""
,
impl
=
"""
static CYTHON_INLINE char __Pyx_PyBytes_GetItemInt(PyObject* bytes, Py_ssize_t index, int check_bounds) {
if (check_bounds) {
if (unlikely(index >= PyBytes_GET_SIZE(bytes)) |
unlikely(index < -PyBytes_GET_SIZE(bytes))) {
PyErr_Format(PyExc_IndexError, "string index out of range");
return -1;
}
}
if (index < 0)
index += PyBytes_GET_SIZE(bytes);
return PyBytes_AS_STRING(bytes)[index];
}
"""
)
include_string_h_utility_code
=
UtilityCode
(
include_string_h_utility_code
=
UtilityCode
(
proto
=
"""
proto
=
"""
#include <string.h>
#include <string.h>
...
...
tests/run/bytes_indexing.pyx
0 → 100644
View file @
f54625ae
cimport
cython
cdef
bytes
b12345
=
b'12345'
def
index_literal
(
int
i
):
"""
>>> index_literal(0) == '1'.encode('ASCII')
True
>>> index_literal(-5) == '1'.encode('ASCII')
True
>>> index_literal(2) == '3'.encode('ASCII')
True
>>> index_literal(4) == '5'.encode('ASCII')
True
"""
return
b"12345"
[
i
]
@
cython
.
test_assert_path_exists
(
"//PythonCapiCallNode"
)
@
cython
.
test_fail_if_path_exists
(
"//IndexNode"
,
"//CoerceFromPyTypeNode"
)
def
index_literal_char_cast
(
int
i
):
"""
>>> index_literal_char_cast(0) == ord('1')
True
>>> index_literal_char_cast(-5) == ord('1')
True
>>> index_literal_char_cast(2) == ord('3')
True
>>> index_literal_char_cast(4) == ord('5')
True
>>> index_literal_char_cast(6)
Traceback (most recent call last):
IndexError: string index out of range
"""
return
<
char
>
(
b"12345"
[
i
])
@
cython
.
test_assert_path_exists
(
"//PythonCapiCallNode"
)
@
cython
.
test_fail_if_path_exists
(
"//IndexNode"
,
"//CoerceFromPyTypeNode"
)
def
index_literal_uchar_cast
(
int
i
):
"""
>>> index_literal_uchar_cast(0) == ord('1')
True
>>> index_literal_uchar_cast(-5) == ord('1')
True
>>> index_literal_uchar_cast(2) == ord('3')
True
>>> index_literal_uchar_cast(4) == ord('5')
True
>>> index_literal_uchar_cast(6)
Traceback (most recent call last):
IndexError: string index out of range
"""
return
<
unsigned
char
>
(
b"12345"
[
i
])
@
cython
.
test_assert_path_exists
(
"//PythonCapiCallNode"
)
@
cython
.
test_fail_if_path_exists
(
"//IndexNode"
,
"//CoerceFromPyTypeNode"
)
def
index_literal_char_coerce
(
int
i
):
"""
>>> index_literal_char_coerce(0) == ord('1')
True
>>> index_literal_char_coerce(-5) == ord('1')
True
>>> index_literal_char_coerce(2) == ord('3')
True
>>> index_literal_char_coerce(4) == ord('5')
True
>>> index_literal_char_coerce(6)
Traceback (most recent call last):
IndexError: string index out of range
"""
cdef
char
result
=
b"12345"
[
i
]
return
result
@
cython
.
test_assert_path_exists
(
"//PythonCapiCallNode"
)
@
cython
.
test_fail_if_path_exists
(
"//IndexNode"
,
"//CoerceFromPyTypeNode"
)
@
cython
.
boundscheck
(
False
)
def
index_literal_char_coerce_no_check
(
int
i
):
"""
>>> index_literal_char_coerce_no_check(0) == ord('1')
True
>>> index_literal_char_coerce_no_check(-5) == ord('1')
True
>>> index_literal_char_coerce_no_check(2) == ord('3')
True
>>> index_literal_char_coerce_no_check(4) == ord('5')
True
"""
cdef
char
result
=
b"12345"
[
i
]
return
result
tests/run/py_unicode_type.pyx
View file @
f54625ae
# -*- coding: iso-8859-1 -*-
# -*- coding: iso-8859-1 -*-
cimport
cython
cdef
Py_UNICODE
char_ASCII
=
u'A'
cdef
Py_UNICODE
char_ASCII
=
u'A'
cdef
Py_UNICODE
char_KLINGON
=
u'
\
uF8D2
'
cdef
Py_UNICODE
char_KLINGON
=
u'
\
uF8D2
'
...
@@ -15,9 +17,9 @@ def compare_ASCII():
...
@@ -15,9 +17,9 @@ def compare_ASCII():
print
(
char_ASCII
==
u'
\
uF8D2
'
)
print
(
char_ASCII
==
u'
\
uF8D2
'
)
def
compare_
KLINGON
():
def
compare_
klingon
():
"""
"""
>>> compare_
ASCII
()
>>> compare_
klingon
()
True
True
False
False
False
False
...
@@ -41,20 +43,66 @@ def index_literal(int i):
...
@@ -41,20 +43,66 @@ def index_literal(int i):
return
u"12345"
[
i
]
return
u"12345"
[
i
]
def
index_literal_pyunicode
(
int
i
):
@
cython
.
test_assert_path_exists
(
"//PythonCapiCallNode"
)
@
cython
.
test_fail_if_path_exists
(
"//IndexNode"
,
"//CoerceFromPyTypeNode"
)
def
index_literal_pyunicode_cast
(
int
i
):
"""
"""
>>> index_literal_pyunicode(0) == '1'
>>> index_literal_pyunicode
_cast
(0) == '1'
True
True
>>> index_literal_pyunicode(-5) == '1'
>>> index_literal_pyunicode
_cast
(-5) == '1'
True
True
>>> index_literal_pyunicode(2) == '3'
>>> index_literal_pyunicode
_cast
(2) == '3'
True
True
>>> index_literal_pyunicode(4) == '5'
>>> index_literal_pyunicode
_cast
(4) == '5'
True
True
>>> index_literal_pyunicode_coerce(6)
Traceback (most recent call last):
IndexError: string index out of range
"""
"""
return
<
Py_UNICODE
>
(
u"12345"
[
i
])
return
<
Py_UNICODE
>
(
u"12345"
[
i
])
@
cython
.
test_assert_path_exists
(
"//PythonCapiCallNode"
)
@
cython
.
test_fail_if_path_exists
(
"//IndexNode"
,
"//CoerceFromPyTypeNode"
)
def
index_literal_pyunicode_coerce
(
int
i
):
"""
>>> index_literal_pyunicode_coerce(0) == '1'
True
>>> index_literal_pyunicode_coerce(-5) == '1'
True
>>> index_literal_pyunicode_coerce(2) == '3'
True
>>> index_literal_pyunicode_coerce(4) == '5'
True
>>> index_literal_pyunicode_coerce(6)
Traceback (most recent call last):
IndexError: string index out of range
"""
cdef
Py_UNICODE
result
=
u"12345"
[
i
]
return
result
@
cython
.
test_assert_path_exists
(
"//PythonCapiCallNode"
)
@
cython
.
test_fail_if_path_exists
(
"//IndexNode"
,
"//CoerceFromPyTypeNode"
)
@
cython
.
boundscheck
(
False
)
def
index_literal_pyunicode_coerce_no_check
(
int
i
):
"""
>>> index_literal_pyunicode_coerce_no_check(0) == '1'
True
>>> index_literal_pyunicode_coerce_no_check(-5) == '1'
True
>>> index_literal_pyunicode_coerce_no_check(2) == '3'
True
>>> index_literal_pyunicode_coerce_no_check(4) == '5'
True
"""
cdef
Py_UNICODE
result
=
u"12345"
[
i
]
return
result
from
cpython.unicode
cimport
PyUnicode_FromOrdinal
from
cpython.unicode
cimport
PyUnicode_FromOrdinal
import
sys
import
sys
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment