Commit 99eac7ae authored by Stefan Behnel's avatar Stefan Behnel

support charptr.decode() with non-literal encodings and fix some temp usage issues; test case split

--HG--
rename : tests/run/carray_slicing.pyx => tests/run/charptr_decode.pyx
parent 6b408678
......@@ -1312,7 +1312,7 @@ class OptimizeBuiltinCalls(Visitor.EnvTransform):
else:
if start.type.is_pyobject:
start = start.coerce_to(PyrexTypes.c_py_ssize_t_type, self.env_stack[-1])
if not start.is_simple:
if stop:
start = UtilNodes.LetRefNode(start)
temps.append(start)
string_node = ExprNodes.AddNode(pos=start.pos,
......@@ -1334,7 +1334,7 @@ class OptimizeBuiltinCalls(Visitor.EnvTransform):
return node
if not stop:
if start or not string_node.is_simple:
if start or not string_node.is_name:
string_node = UtilNodes.LetRefNode(string_node)
temps.append(string_node)
stop = ExprNodes.PythonCapiCallNode(
......@@ -1359,6 +1359,8 @@ class OptimizeBuiltinCalls(Visitor.EnvTransform):
encoding, encoding_node, error_handling, error_handling_node = parameters
# try to find a specific encoder function
codec_name = None
if encoding is not None:
codec_name = self._find_special_codec_name(encoding)
if codec_name is not None:
decode_function = "PyUnicode_Decode%s" % codec_name
......@@ -1397,22 +1399,24 @@ class OptimizeBuiltinCalls(Visitor.EnvTransform):
encoding_node = args[1]
if isinstance(encoding_node, ExprNodes.CoerceToPyTypeNode):
encoding_node = encoding_node.arg
if not isinstance(encoding_node, (ExprNodes.UnicodeNode, ExprNodes.StringNode,
if isinstance(encoding_node, (ExprNodes.UnicodeNode, ExprNodes.StringNode,
ExprNodes.BytesNode)):
return None
encoding = encoding_node.value
encoding_node = ExprNodes.BytesNode(encoding_node.pos, value=encoding,
type=PyrexTypes.c_char_ptr_type)
elif encoding_node.type.is_string:
encoding = None
else:
return None
null_node = ExprNodes.NullNode(pos)
if len(args) == 3:
error_handling_node = args[2]
if isinstance(error_handling_node, ExprNodes.CoerceToPyTypeNode):
error_handling_node = error_handling_node.arg
if not isinstance(error_handling_node,
if isinstance(error_handling_node,
(ExprNodes.UnicodeNode, ExprNodes.StringNode,
ExprNodes.BytesNode)):
return None
error_handling = error_handling_node.value
if error_handling == 'strict':
error_handling_node = null_node
......@@ -1420,6 +1424,10 @@ class OptimizeBuiltinCalls(Visitor.EnvTransform):
error_handling_node = ExprNodes.BytesNode(
error_handling_node.pos, value=error_handling,
type=PyrexTypes.c_char_ptr_type)
elif error_handling_node.type.is_string:
error_handling = None
else:
return None
else:
error_handling = 'strict'
error_handling_node = null_node
......
......@@ -13,61 +13,6 @@ def slice_charptr_end():
"""
return cstring[:1], cstring[:3], cstring[:9]
@cython.test_assert_path_exists("//PythonCapiCallNode")
@cython.test_fail_if_path_exists("//AttributeNode")
def slice_charptr_decode():
"""
>>> print(str(slice_charptr_decode()).replace("u'", "'"))
('a', 'abc', 'abcABCqtp')
"""
return (cstring[:1].decode('UTF-8'),
cstring[:3].decode('UTF-8'),
cstring[:9].decode('UTF-8'))
@cython.test_assert_path_exists("//PythonCapiCallNode")
@cython.test_fail_if_path_exists("//AttributeNode")
def slice_charptr_decode_slice2():
"""
>>> print(str(slice_charptr_decode_slice2()).replace("u'", "'"))
('a', 'bc', 'tp')
"""
return (cstring[0:1].decode('UTF-8'),
cstring[1:3].decode('UTF-8'),
cstring[7:9].decode('UTF-8'))
@cython.test_assert_path_exists("//PythonCapiCallNode")
@cython.test_fail_if_path_exists("//AttributeNode")
def slice_charptr_decode_strlen():
"""
>>> print(str(slice_charptr_decode_strlen()).replace("u'", "'"))
('abcABCqtp', 'bcABCqtp', '')
"""
return (cstring.decode('UTF-8'),
cstring[1:].decode('UTF-8'),
cstring[9:].decode('UTF-8'))
@cython.test_assert_path_exists("//PythonCapiCallNode")
@cython.test_fail_if_path_exists("//AttributeNode")
def slice_charptr_decode_unbound():
"""
>>> print(str(slice_charptr_decode_unbound()).replace("u'", "'"))
('a', 'abc', 'abcABCqtp')
"""
return (bytes.decode(cstring[:1], 'UTF-8'),
bytes.decode(cstring[:3], 'UTF-8', 'replace'),
bytes.decode(cstring[:9], 'UTF-8'))
@cython.test_assert_path_exists("//PythonCapiCallNode")
@cython.test_fail_if_path_exists("//AttributeNode")
def slice_charptr_decode_errormode():
"""
>>> print(str(slice_charptr_decode_errormode()).replace("u'", "'"))
('a', 'abc', 'abcABCqtp')
"""
return (cstring[:1].decode('UTF-8', 'strict'),
cstring[:3].decode('UTF-8', 'replace'),
cstring[:9].decode('UTF-8', 'unicode_escape'))
@cython.test_assert_path_exists("//ForFromStatNode",
"//ForFromStatNode//SliceIndexNode")
@cython.test_fail_if_path_exists("//ForInStatNode")
......@@ -117,7 +62,7 @@ def slice_charptr_for_loop_c():
@cython.test_fail_if_path_exists("//ForInStatNode")
def slice_charptr_for_loop_c_dynamic_bounds():
"""
>>> slice_charptr_for_loop_c()
>>> slice_charptr_for_loop_c_dynamic_bounds()
['a', 'b', 'c']
['b', 'c', 'A', 'B']
['B', 'C', 'q', 't', 'p']
......
cimport cython
############################################################
# tests for char* slicing
cdef char* cstring = "abcABCqtp"
@cython.test_assert_path_exists("//PythonCapiCallNode")
@cython.test_fail_if_path_exists("//AttributeNode")
def slice_charptr_decode():
"""
>>> print(str(slice_charptr_decode()).replace("u'", "'"))
('a', 'abc', 'abcABCqtp')
"""
return (cstring[:1].decode('UTF-8'),
cstring[:3].decode('UTF-8'),
cstring[:9].decode('UTF-8'))
@cython.test_assert_path_exists("//PythonCapiCallNode")
@cython.test_fail_if_path_exists("//AttributeNode")
def slice_charptr_decode_unknown_encoding():
"""
>>> print(str(slice_charptr_decode_unknown_encoding()).replace("u'", "'"))
('abcABCqtp', 'abcABCqtp', 'abc', 'abcABCqt')
"""
cdef char* enc = 'UTF-8'
cdef char* error_handling = 'strict'
return (cstring.decode(enc),
cstring.decode(enc, error_handling),
cstring[:3].decode(enc),
cstring[:8].decode(enc, error_handling))
@cython.test_assert_path_exists("//PythonCapiCallNode")
@cython.test_fail_if_path_exists("//AttributeNode")
def slice_charptr_decode_slice2():
"""
>>> print(str(slice_charptr_decode_slice2()).replace("u'", "'"))
('a', 'bc', 'tp')
"""
return (cstring[0:1].decode('UTF-8'),
cstring[1:3].decode('UTF-8'),
cstring[7:9].decode('UTF-8'))
@cython.test_assert_path_exists("//PythonCapiCallNode")
@cython.test_fail_if_path_exists("//AttributeNode")
def slice_charptr_decode_strlen():
"""
>>> print(str(slice_charptr_decode_strlen()).replace("u'", "'"))
('abcABCqtp', 'bcABCqtp', '')
"""
return (cstring.decode('UTF-8'),
cstring[1:].decode('UTF-8'),
cstring[9:].decode('UTF-8'))
@cython.test_assert_path_exists("//PythonCapiCallNode")
@cython.test_fail_if_path_exists("//AttributeNode")
def slice_charptr_decode_unbound():
"""
>>> print(str(slice_charptr_decode_unbound()).replace("u'", "'"))
('a', 'abc', 'abcABCqtp')
"""
return (bytes.decode(cstring[:1], 'UTF-8'),
bytes.decode(cstring[:3], 'UTF-8', 'replace'),
bytes.decode(cstring[:9], 'UTF-8'))
@cython.test_assert_path_exists("//PythonCapiCallNode")
@cython.test_fail_if_path_exists("//AttributeNode")
def slice_charptr_decode_errormode():
"""
>>> print(str(slice_charptr_decode_errormode()).replace("u'", "'"))
('a', 'abc', 'abcABCqtp')
"""
return (cstring[:1].decode('UTF-8', 'strict'),
cstring[:3].decode('UTF-8', 'replace'),
cstring[:9].decode('UTF-8', 'unicode_escape'))
@cython.test_assert_path_exists("//PythonCapiCallNode")
@cython.test_fail_if_path_exists("//AttributeNode")
def slice_charptr_dynamic_bounds():
"""
>>> print(str(slice_charptr_dynamic_bounds()).replace("u'", "'"))
('abc', 'abc', 'bcAB', 'BCqtp')
"""
return (cstring[:return3()].decode('UTF-8'),
cstring[0:return3()].decode('UTF-8'),
cstring[return1():return5()].decode('UTF-8'),
cstring[return4():return9()].decode('UTF-8'))
cdef return1(): return 1
cdef return3(): return 3
cdef return4(): return 4
cdef return5(): return 5
cdef return9(): return 9
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment