Commit 29945fa6 authored by Stefan Behnel's avatar Stefan Behnel

for indexing into / looping over unicode strings, infer Py_UCS4 instead of Py_UNICODE

parent ac871a9e
...@@ -2066,14 +2066,14 @@ class IndexNode(ExprNode): ...@@ -2066,14 +2066,14 @@ class IndexNode(ExprNode):
if index_type and index_type.is_int or isinstance(self.index, (IntNode, LongNode)): if index_type and index_type.is_int or isinstance(self.index, (IntNode, LongNode)):
# indexing! # indexing!
if base_type is unicode_type: if base_type is unicode_type:
# Py_UNICODE will automatically coerce to a unicode string # Py_UCS4 will automatically coerce to a unicode string
# if required, so this is safe. We only infer Py_UNICODE # if required, so this is safe. We only infer Py_UCS4
# when the index is a C integer type. Otherwise, we may # when the index is a C integer type. Otherwise, we may
# need to use normal Python item access, in which case # need to use normal Python item access, in which case
# it's faster to return the one-char unicode string than # it's faster to return the one-char unicode string than
# to receive it, throw it away, and potentially rebuild it # to receive it, throw it away, and potentially rebuild it
# on a subsequent PyObject coercion. # on a subsequent PyObject coercion.
return PyrexTypes.c_py_unicode_type return PyrexTypes.c_py_ucs4_type
elif isinstance(self.base, BytesNode): elif isinstance(self.base, BytesNode):
#if env.global_scope().context.language_level >= 3: #if env.global_scope().context.language_level >= 3:
# # infering 'char' can be made to work in Python 3 mode # # infering 'char' can be made to work in Python 3 mode
...@@ -2193,7 +2193,7 @@ class IndexNode(ExprNode): ...@@ -2193,7 +2193,7 @@ class IndexNode(ExprNode):
if self.index.type.is_int and base_type is unicode_type: if self.index.type.is_int and base_type is unicode_type:
# Py_UNICODE/Py_UCS4 will automatically coerce to a unicode string # Py_UNICODE/Py_UCS4 will automatically coerce to a unicode string
# if required, so this is fast and safe # if required, so this is fast and safe
self.type = PyrexTypes.c_py_unicode_type self.type = PyrexTypes.c_py_ucs4_type
elif is_slice and base_type in (bytes_type, str_type, unicode_type, list_type, tuple_type): elif is_slice and base_type in (bytes_type, str_type, unicode_type, list_type, tuple_type):
self.type = base_type self.type = base_type
else: else:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment