Commit 4b64bbe1 authored by Stefan Behnel's avatar Stefan Behnel

De-optimise the unicode methods ".upper()", ".lower()" and ".title()" on...

De-optimise the unicode methods ".upper()", ".lower()" and ".title()" on single character values since they must still be able to return multiple characters.
parent 8103d055
...@@ -20,6 +20,11 @@ Features added ...@@ -20,6 +20,11 @@ Features added
Bugs fixed Bugs fixed
---------- ----------
* The unicode methods ``.upper()``, ``.lower()`` and ``.title()`` were
incorrectly optimised for single character input values and only returned
the first character if multiple characters should have been returned.
They now use the original Python methods again.
* The ``Py_hash_t`` type failed to accept arbitrary "index" values. * The ``Py_hash_t`` type failed to accept arbitrary "index" values.
(Github issue #2752) (Github issue #2752)
......
...@@ -3374,6 +3374,8 @@ class OptimizeBuiltinCalls(Visitor.NodeRefCleanupMixin, ...@@ -3374,6 +3374,8 @@ class OptimizeBuiltinCalls(Visitor.NodeRefCleanupMixin,
PyrexTypes.CFuncTypeArg("uchar", PyrexTypes.c_py_ucs4_type, None), PyrexTypes.CFuncTypeArg("uchar", PyrexTypes.c_py_ucs4_type, None),
]) ])
# DISABLED: Return value can only be one character, which is not correct.
'''
def _inject_unicode_character_conversion(self, node, function, args, is_unbound_method): def _inject_unicode_character_conversion(self, node, function, args, is_unbound_method):
if is_unbound_method or len(args) != 1: if is_unbound_method or len(args) != 1:
return node return node
...@@ -3392,9 +3394,10 @@ class OptimizeBuiltinCalls(Visitor.NodeRefCleanupMixin, ...@@ -3392,9 +3394,10 @@ class OptimizeBuiltinCalls(Visitor.NodeRefCleanupMixin,
func_call = func_call.coerce_to_pyobject(self.current_env) func_call = func_call.coerce_to_pyobject(self.current_env)
return func_call return func_call
_handle_simple_method_unicode_lower = _inject_unicode_character_conversion #_handle_simple_method_unicode_lower = _inject_unicode_character_conversion
_handle_simple_method_unicode_upper = _inject_unicode_character_conversion #_handle_simple_method_unicode_upper = _inject_unicode_character_conversion
_handle_simple_method_unicode_title = _inject_unicode_character_conversion #_handle_simple_method_unicode_title = _inject_unicode_character_conversion
'''
PyUnicode_Splitlines_func_type = PyrexTypes.CFuncType( PyUnicode_Splitlines_func_type = PyrexTypes.CFuncType(
Builtin.list_type, [ Builtin.list_type, [
......
...@@ -132,15 +132,24 @@ def unicode_type_methods(Py_UCS4 uchar): ...@@ -132,15 +132,24 @@ def unicode_type_methods(Py_UCS4 uchar):
uchar.isupper(), uchar.isupper(),
] ]
@cython.test_assert_path_exists('//PythonCapiCallNode') #@cython.test_assert_path_exists('//PythonCapiCallNode')
@cython.test_fail_if_path_exists('//SimpleCallNode') #@cython.test_fail_if_path_exists('//SimpleCallNode')
def unicode_methods(Py_UCS4 uchar): def unicode_methods(Py_UCS4 uchar):
""" """
>>> unicode_methods(ord('A')) == ['a', 'A', 'A'] >>> unicode_methods(ord('A')) == ['a', 'A', 'A'] or unicode_methods(ord('A'))
True
>>> unicode_methods(ord('a')) == ['a', 'A', 'A'] or unicode_methods(ord('a'))
True True
>>> unicode_methods(ord('a')) == ['a', 'A', 'A'] >>> unicode_methods(0x1E9E) == [u'\\xdf', u'\\u1e9e', u'\\u1e9e'] or unicode_methods(0x1E9E)
True
>>> unicode_methods(0x0130) in (
... [u'i\\u0307', u'\\u0130', u'\\u0130'], # Py3
... [u'i', u'\\u0130', u'\\u0130'], # Py2
... ) or unicode_methods(0x0130)
True True
""" """
# \u1E9E == 'LATIN CAPITAL LETTER SHARP S'
# \u0130 == 'LATIN CAPITAL LETTER I WITH DOT ABOVE'
return [ return [
# character conversion # character conversion
uchar.lower(), uchar.lower(),
...@@ -149,11 +158,11 @@ def unicode_methods(Py_UCS4 uchar): ...@@ -149,11 +158,11 @@ def unicode_methods(Py_UCS4 uchar):
] ]
@cython.test_assert_path_exists('//PythonCapiCallNode') #@cython.test_assert_path_exists('//PythonCapiCallNode')
@cython.test_fail_if_path_exists( #@cython.test_fail_if_path_exists(
'//SimpleCallNode', # '//SimpleCallNode',
'//CoerceFromPyTypeNode', # '//CoerceFromPyTypeNode',
) #)
def unicode_method_return_type(Py_UCS4 uchar): def unicode_method_return_type(Py_UCS4 uchar):
""" """
>>> unicode_method_return_type(ord('A')) >>> unicode_method_return_type(ord('A'))
...@@ -366,5 +375,5 @@ def uchar_lookup_in_dict(obj, Py_UCS4 uchar): ...@@ -366,5 +375,5 @@ def uchar_lookup_in_dict(obj, Py_UCS4 uchar):
_WARNINGS = """ _WARNINGS = """
364:16: Item lookup of unicode character codes now always converts to a Unicode string. Use an explicit C integer cast to get back the previous integer lookup behaviour. 373:16: Item lookup of unicode character codes now always converts to a Unicode string. Use an explicit C integer cast to get back the previous integer lookup behaviour.
""" """
...@@ -123,8 +123,8 @@ def unicode_type_methods(Py_UNICODE uchar): ...@@ -123,8 +123,8 @@ def unicode_type_methods(Py_UNICODE uchar):
uchar.isupper(), uchar.isupper(),
] ]
@cython.test_assert_path_exists('//PythonCapiCallNode') #@cython.test_assert_path_exists('//PythonCapiCallNode')
@cython.test_fail_if_path_exists('//SimpleCallNode') #@cython.test_fail_if_path_exists('//SimpleCallNode')
def unicode_methods(Py_UNICODE uchar): def unicode_methods(Py_UNICODE uchar):
""" """
>>> unicode_methods(ord('A')) == ['a', 'A', 'A'] >>> unicode_methods(ord('A')) == ['a', 'A', 'A']
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment