Commit 1aeb2d9b authored by Stefan Behnel's avatar Stefan Behnel

prevent redundant coercion from Py_UNICODE to a unicode string when the...

prevent redundant coercion from Py_UNICODE to a unicode string when the subscript index is a Python object - this case is no longer optimised
parent c71cde5f
......@@ -1897,9 +1897,14 @@ class IndexNode(ExprNode):
base_type = self.base.infer_type(env)
if base_type.is_ptr or base_type.is_array:
return base_type.base_type
elif base_type is Builtin.unicode_type:
elif base_type is Builtin.unicode_type and self.index.infer_type(env).is_int:
# Py_UNICODE will automatically coerce to a unicode string
# if required, so this is safe
# if required, so this is safe. We only infer Py_UNICODE
# when the index is a C integer type. Otherwise, we may
# need to use normal Python item access, in which case
# it's faster to return the one-char unicode string than
# to receive it, throw it away, and potentially rebuild it
# on a subsequent PyObject coercion.
return PyrexTypes.c_py_unicode_type
else:
# TODO: Handle buffers (hopefully without too much redundancy).
......@@ -1988,9 +1993,9 @@ class IndexNode(ExprNode):
else:
self.index = self.index.coerce_to_pyobject(env)
self.is_temp = 1
if base_type is unicode_type:
if self.index.type.is_int and base_type is unicode_type:
# Py_UNICODE will automatically coerce to a unicode string
# if required, so this is safe
# if required, so this is fast and safe
self.type = PyrexTypes.c_py_unicode_type
else:
self.type = py_object_type
......@@ -2128,13 +2133,10 @@ class IndexNode(ExprNode):
code.error_goto(self.pos)))
code.put_gotref(self.py_result())
elif self.type is PyrexTypes.c_py_unicode_type and self.base.type is unicode_type:
code.globalstate.use_utility_code(getitem_int_pyunicode_utility_code)
if self.index.type.is_int:
assert self.index.type.is_int
index_code = self.index.result()
function = "__Pyx_GetItemInt_Unicode"
else:
index_code = self.index.py_result()
function = "__Pyx_GetItemInt_Unicode_Generic"
code.globalstate.use_utility_code(getitem_int_pyunicode_utility_code)
code.putln(
"%s = %s(%s, %s%s); if (unlikely(%s == (Py_UNICODE)-1)) %s;" % (
self.result(),
......@@ -6784,18 +6786,17 @@ static CYTHON_INLINE Py_UNICODE __Pyx_GetItemInt_Unicode_Fast(PyObject* ustring,
}
static CYTHON_INLINE Py_UNICODE __Pyx_GetItemInt_Unicode_Generic(PyObject* ustring, PyObject* j) {
PyObject *r;
Py_UNICODE uchar;
PyObject *uchar_string;
if (!j) return (Py_UNICODE)-1;
r = PyObject_GetItem(ustring, j);
uchar_string = PyObject_GetItem(ustring, j);
Py_DECREF(j);
if (!r) return (Py_UNICODE)-1;
uchar = PyUnicode_AS_UNICODE(r)[0];
Py_DECREF(r);
if (!uchar_string) return (Py_UNICODE)-1;
uchar = PyUnicode_AS_UNICODE(uchar_string)[0];
Py_DECREF(uchar_string);
return uchar;
}
''',
)
''')
getitem_int_utility_code = UtilityCode(
proto = """
......
......@@ -26,6 +26,26 @@ def index(unicode ustring, Py_ssize_t i):
return ustring[i]
@cython.test_assert_path_exists("//IndexNode")
@cython.test_fail_if_path_exists("//CoerceToPyTypeNode")
def index_pyindex(unicode ustring, i):
"""
>>> index(ustring, 0) == 'a'
True
>>> index(ustring, 2) == 'e'
True
>>> index(ustring, -1) == '6'
True
>>> index(ustring, -len(ustring)) == 'a'
True
>>> index(ustring, len(ustring))
Traceback (most recent call last):
IndexError: string index out of range
"""
return ustring[i]
@cython.test_assert_path_exists("//CoerceToPyTypeNode",
"//IndexNode")
......@@ -219,3 +239,33 @@ def index_add(unicode ustring, Py_ssize_t i, Py_ssize_t j):
True
"""
return ustring[i] + ustring[j]
@cython.test_assert_path_exists("//CoerceToPyTypeNode",
"//IndexNode",
"//InPlaceAssignmentNode",
"//CoerceToPyTypeNode//IndexNode")
@cython.test_fail_if_path_exists("//IndexNode//CoerceToPyTypeNode")
def index_concat_loop(unicode ustring):
"""
>>> index_concat_loop(ustring) == ustring
True
"""
cdef int i
cdef unicode s = u''
for i in range(len(ustring)):
s += ustring[i]
return s
@cython.test_assert_path_exists("//CoerceToPyTypeNode",
"//IndexNode",
"//CoerceToPyTypeNode//IndexNode")
@cython.test_fail_if_path_exists("//IndexNode//CoerceToPyTypeNode")
def index_join_loop(unicode ustring):
"""
>>> index_join_loop(ustring) == ustring
True
"""
cdef int i
return u''.join([ ustring[i] for i in range(len(ustring)) ])
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment