Commit 786e37f7 authored by Stefan Behnel's avatar Stefan Behnel

Always convert Py_UNICODE and Py_UCS4 values to Unicode strings when used for item access.

Issue a warning as this is a behavioural change that can lead to subtle code breakage of currently working code, in the same way that the previous behaviour could easily lead to bugs in general.

Resolves #1602.
parent e4b8a88d
...@@ -3466,7 +3466,13 @@ class IndexNode(_IndexingBaseNode): ...@@ -3466,7 +3466,13 @@ class IndexNode(_IndexingBaseNode):
def analyse_as_pyobject(self, env, is_slice, getting, setting): def analyse_as_pyobject(self, env, is_slice, getting, setting):
base_type = self.base.type base_type = self.base.type
if self.index.type.is_int and base_type is not dict_type: if self.index.type.is_unicode_char:
warning(self.pos,
"Item lookup of unicode character codes now always converts to a Unicode string. "
"Use an explicit C integer cast to get back the previous integer lookup behaviour.", level=1)
self.index = self.index.coerce_to_pyobject(env)
self.is_temp = 1
elif self.index.type.is_int and base_type is not dict_type:
if (getting if (getting
and (base_type in (list_type, tuple_type, bytearray_type)) and (base_type in (list_type, tuple_type, bytearray_type))
and (not self.index.type.signed and (not self.index.type.signed
......
...@@ -342,3 +342,21 @@ def uchar_in(Py_UCS4 uchar, unicode ustring): ...@@ -342,3 +342,21 @@ def uchar_in(Py_UCS4 uchar, unicode ustring):
""" """
assert uchar == 0x12345, ('%X' % uchar) assert uchar == 0x12345, ('%X' % uchar)
return uchar in ustring return uchar in ustring
def uchar_lookup_in_dict(obj, Py_UCS4 uchar):
"""
>>> d = {high_uchar: 12345, u0: 0, u1: 1, u_A: 2}
>>> uchar_lookup_in_dict(d, high_uchar)
(12345, 12345)
>>> uchar_lookup_in_dict(d, u_A)
(2, 2)
>>> uchar_lookup_in_dict(d, u0)
(0, 0)
>>> uchar_lookup_in_dict(d, u1)
(1, 1)
"""
cdef dict d = obj
dval = d[uchar]
objval = obj[uchar]
return dval, objval
...@@ -229,3 +229,21 @@ def index_and_in(): ...@@ -229,3 +229,21 @@ def index_and_in():
for i in range(1,9): for i in range(1,9):
if u'abcdefgh'[-i] in u'abCDefGh': if u'abcdefgh'[-i] in u'abCDefGh':
print i print i
def uchar_lookup_in_dict(obj, Py_UNICODE uchar):
"""
>>> d = {u_KLINGON: 1234, u0: 0, u1: 1, u_A: 2}
>>> uchar_lookup_in_dict(d, u_KLINGON)
(1234, 1234)
>>> uchar_lookup_in_dict(d, u_A)
(2, 2)
>>> uchar_lookup_in_dict(d, u0)
(0, 0)
>>> uchar_lookup_in_dict(d, u1)
(1, 1)
"""
cdef dict d = obj
dval = d[uchar]
objval = obj[uchar]
return dval, objval
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment