Commit 1aeb2d9b authored by Stefan Behnel's avatar Stefan Behnel

prevent redundant coercion from Py_UNICODE to a unicode string when the...

prevent redundant coercion from Py_UNICODE to a unicode string when the subscript index is a Python object - this case is no longer optimised
parent c71cde5f
...@@ -1897,9 +1897,14 @@ class IndexNode(ExprNode): ...@@ -1897,9 +1897,14 @@ class IndexNode(ExprNode):
base_type = self.base.infer_type(env) base_type = self.base.infer_type(env)
if base_type.is_ptr or base_type.is_array: if base_type.is_ptr or base_type.is_array:
return base_type.base_type return base_type.base_type
elif base_type is Builtin.unicode_type: elif base_type is Builtin.unicode_type and self.index.infer_type(env).is_int:
# Py_UNICODE will automatically coerce to a unicode string # Py_UNICODE will automatically coerce to a unicode string
# if required, so this is safe # if required, so this is safe. We only infer Py_UNICODE
# when the index is a C integer type. Otherwise, we may
# need to use normal Python item access, in which case
# it's faster to return the one-char unicode string than
# to receive it, throw it away, and potentially rebuild it
# on a subsequent PyObject coercion.
return PyrexTypes.c_py_unicode_type return PyrexTypes.c_py_unicode_type
else: else:
# TODO: Handle buffers (hopefully without too much redundancy). # TODO: Handle buffers (hopefully without too much redundancy).
...@@ -1988,9 +1993,9 @@ class IndexNode(ExprNode): ...@@ -1988,9 +1993,9 @@ class IndexNode(ExprNode):
else: else:
self.index = self.index.coerce_to_pyobject(env) self.index = self.index.coerce_to_pyobject(env)
self.is_temp = 1 self.is_temp = 1
if base_type is unicode_type: if self.index.type.is_int and base_type is unicode_type:
# Py_UNICODE will automatically coerce to a unicode string # Py_UNICODE will automatically coerce to a unicode string
# if required, so this is safe # if required, so this is fast and safe
self.type = PyrexTypes.c_py_unicode_type self.type = PyrexTypes.c_py_unicode_type
else: else:
self.type = py_object_type self.type = py_object_type
...@@ -2128,13 +2133,10 @@ class IndexNode(ExprNode): ...@@ -2128,13 +2133,10 @@ class IndexNode(ExprNode):
code.error_goto(self.pos))) code.error_goto(self.pos)))
code.put_gotref(self.py_result()) code.put_gotref(self.py_result())
elif self.type is PyrexTypes.c_py_unicode_type and self.base.type is unicode_type: elif self.type is PyrexTypes.c_py_unicode_type and self.base.type is unicode_type:
code.globalstate.use_utility_code(getitem_int_pyunicode_utility_code) assert self.index.type.is_int
if self.index.type.is_int:
index_code = self.index.result() index_code = self.index.result()
function = "__Pyx_GetItemInt_Unicode" function = "__Pyx_GetItemInt_Unicode"
else: code.globalstate.use_utility_code(getitem_int_pyunicode_utility_code)
index_code = self.index.py_result()
function = "__Pyx_GetItemInt_Unicode_Generic"
code.putln( code.putln(
"%s = %s(%s, %s%s); if (unlikely(%s == (Py_UNICODE)-1)) %s;" % ( "%s = %s(%s, %s%s); if (unlikely(%s == (Py_UNICODE)-1)) %s;" % (
self.result(), self.result(),
...@@ -6784,18 +6786,17 @@ static CYTHON_INLINE Py_UNICODE __Pyx_GetItemInt_Unicode_Fast(PyObject* ustring, ...@@ -6784,18 +6786,17 @@ static CYTHON_INLINE Py_UNICODE __Pyx_GetItemInt_Unicode_Fast(PyObject* ustring,
} }
static CYTHON_INLINE Py_UNICODE __Pyx_GetItemInt_Unicode_Generic(PyObject* ustring, PyObject* j) { static CYTHON_INLINE Py_UNICODE __Pyx_GetItemInt_Unicode_Generic(PyObject* ustring, PyObject* j) {
PyObject *r;
Py_UNICODE uchar; Py_UNICODE uchar;
PyObject *uchar_string;
if (!j) return (Py_UNICODE)-1; if (!j) return (Py_UNICODE)-1;
r = PyObject_GetItem(ustring, j); uchar_string = PyObject_GetItem(ustring, j);
Py_DECREF(j); Py_DECREF(j);
if (!r) return (Py_UNICODE)-1; if (!uchar_string) return (Py_UNICODE)-1;
uchar = PyUnicode_AS_UNICODE(r)[0]; uchar = PyUnicode_AS_UNICODE(uchar_string)[0];
Py_DECREF(r); Py_DECREF(uchar_string);
return uchar; return uchar;
} }
''', ''')
)
getitem_int_utility_code = UtilityCode( getitem_int_utility_code = UtilityCode(
proto = """ proto = """
......
...@@ -26,6 +26,26 @@ def index(unicode ustring, Py_ssize_t i): ...@@ -26,6 +26,26 @@ def index(unicode ustring, Py_ssize_t i):
return ustring[i] return ustring[i]
@cython.test_assert_path_exists("//IndexNode")
@cython.test_fail_if_path_exists("//CoerceToPyTypeNode")
def index_pyindex(unicode ustring, i):
"""
>>> index(ustring, 0) == 'a'
True
>>> index(ustring, 2) == 'e'
True
>>> index(ustring, -1) == '6'
True
>>> index(ustring, -len(ustring)) == 'a'
True
>>> index(ustring, len(ustring))
Traceback (most recent call last):
IndexError: string index out of range
"""
return ustring[i]
@cython.test_assert_path_exists("//CoerceToPyTypeNode", @cython.test_assert_path_exists("//CoerceToPyTypeNode",
"//IndexNode") "//IndexNode")
...@@ -219,3 +239,33 @@ def index_add(unicode ustring, Py_ssize_t i, Py_ssize_t j): ...@@ -219,3 +239,33 @@ def index_add(unicode ustring, Py_ssize_t i, Py_ssize_t j):
True True
""" """
return ustring[i] + ustring[j] return ustring[i] + ustring[j]
@cython.test_assert_path_exists("//CoerceToPyTypeNode",
"//IndexNode",
"//InPlaceAssignmentNode",
"//CoerceToPyTypeNode//IndexNode")
@cython.test_fail_if_path_exists("//IndexNode//CoerceToPyTypeNode")
def index_concat_loop(unicode ustring):
"""
>>> index_concat_loop(ustring) == ustring
True
"""
cdef int i
cdef unicode s = u''
for i in range(len(ustring)):
s += ustring[i]
return s
@cython.test_assert_path_exists("//CoerceToPyTypeNode",
"//IndexNode",
"//CoerceToPyTypeNode//IndexNode")
@cython.test_fail_if_path_exists("//IndexNode//CoerceToPyTypeNode")
def index_join_loop(unicode ustring):
"""
>>> index_join_loop(ustring) == ustring
True
"""
cdef int i
return u''.join([ ustring[i] for i in range(len(ustring)) ])
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment