Commit 4b64bbe1 authored by Stefan Behnel's avatar Stefan Behnel

De-optimise the unicode methods ".upper()", ".lower()" and ".title()" on...

De-optimise the unicode methods ".upper()", ".lower()" and ".title()" on single character values since they must still be able to return multiple characters.
parent 8103d055
......@@ -20,6 +20,11 @@ Features added
Bugs fixed
----------
* The unicode methods ``.upper()``, ``.lower()`` and ``.title()`` were
incorrectly optimised for single character input values and only returned
the first character if multiple characters should have been returned.
They now use the original Python methods again.
* The ``Py_hash_t`` type failed to accept arbitrary "index" values.
(Github issue #2752)
......
......@@ -3374,6 +3374,8 @@ class OptimizeBuiltinCalls(Visitor.NodeRefCleanupMixin,
PyrexTypes.CFuncTypeArg("uchar", PyrexTypes.c_py_ucs4_type, None),
])
# DISABLED: Return value can only be one character, which is not correct.
'''
def _inject_unicode_character_conversion(self, node, function, args, is_unbound_method):
if is_unbound_method or len(args) != 1:
return node
......@@ -3392,9 +3394,10 @@ class OptimizeBuiltinCalls(Visitor.NodeRefCleanupMixin,
func_call = func_call.coerce_to_pyobject(self.current_env)
return func_call
_handle_simple_method_unicode_lower = _inject_unicode_character_conversion
_handle_simple_method_unicode_upper = _inject_unicode_character_conversion
_handle_simple_method_unicode_title = _inject_unicode_character_conversion
#_handle_simple_method_unicode_lower = _inject_unicode_character_conversion
#_handle_simple_method_unicode_upper = _inject_unicode_character_conversion
#_handle_simple_method_unicode_title = _inject_unicode_character_conversion
'''
PyUnicode_Splitlines_func_type = PyrexTypes.CFuncType(
Builtin.list_type, [
......
......@@ -132,15 +132,24 @@ def unicode_type_methods(Py_UCS4 uchar):
uchar.isupper(),
]
@cython.test_assert_path_exists('//PythonCapiCallNode')
@cython.test_fail_if_path_exists('//SimpleCallNode')
#@cython.test_assert_path_exists('//PythonCapiCallNode')
#@cython.test_fail_if_path_exists('//SimpleCallNode')
def unicode_methods(Py_UCS4 uchar):
"""
>>> unicode_methods(ord('A')) == ['a', 'A', 'A']
>>> unicode_methods(ord('A')) == ['a', 'A', 'A'] or unicode_methods(ord('A'))
True
>>> unicode_methods(ord('a')) == ['a', 'A', 'A'] or unicode_methods(ord('a'))
True
>>> unicode_methods(ord('a')) == ['a', 'A', 'A']
>>> unicode_methods(0x1E9E) == [u'\\xdf', u'\\u1e9e', u'\\u1e9e'] or unicode_methods(0x1E9E)
True
>>> unicode_methods(0x0130) in (
... [u'i\\u0307', u'\\u0130', u'\\u0130'], # Py3
... [u'i', u'\\u0130', u'\\u0130'], # Py2
... ) or unicode_methods(0x0130)
True
"""
# \u1E9E == 'LATIN CAPITAL LETTER SHARP S'
# \u0130 == 'LATIN CAPITAL LETTER I WITH DOT ABOVE'
return [
# character conversion
uchar.lower(),
......@@ -149,11 +158,11 @@ def unicode_methods(Py_UCS4 uchar):
]
@cython.test_assert_path_exists('//PythonCapiCallNode')
@cython.test_fail_if_path_exists(
'//SimpleCallNode',
'//CoerceFromPyTypeNode',
)
#@cython.test_assert_path_exists('//PythonCapiCallNode')
#@cython.test_fail_if_path_exists(
# '//SimpleCallNode',
# '//CoerceFromPyTypeNode',
#)
def unicode_method_return_type(Py_UCS4 uchar):
"""
>>> unicode_method_return_type(ord('A'))
......@@ -366,5 +375,5 @@ def uchar_lookup_in_dict(obj, Py_UCS4 uchar):
_WARNINGS = """
364:16: Item lookup of unicode character codes now always converts to a Unicode string. Use an explicit C integer cast to get back the previous integer lookup behaviour.
373:16: Item lookup of unicode character codes now always converts to a Unicode string. Use an explicit C integer cast to get back the previous integer lookup behaviour.
"""
......@@ -123,8 +123,8 @@ def unicode_type_methods(Py_UNICODE uchar):
uchar.isupper(),
]
@cython.test_assert_path_exists('//PythonCapiCallNode')
@cython.test_fail_if_path_exists('//SimpleCallNode')
#@cython.test_assert_path_exists('//PythonCapiCallNode')
#@cython.test_fail_if_path_exists('//SimpleCallNode')
def unicode_methods(Py_UNICODE uchar):
"""
>>> unicode_methods(ord('A')) == ['a', 'A', 'A']
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment