Commit 99eac7ae authored by Stefan Behnel's avatar Stefan Behnel

support charptr.decode() with non-literal encodings and fix some temp usage issues; test case split

--HG--
rename : tests/run/carray_slicing.pyx => tests/run/charptr_decode.pyx
parent 6b408678
...@@ -1312,7 +1312,7 @@ class OptimizeBuiltinCalls(Visitor.EnvTransform): ...@@ -1312,7 +1312,7 @@ class OptimizeBuiltinCalls(Visitor.EnvTransform):
else: else:
if start.type.is_pyobject: if start.type.is_pyobject:
start = start.coerce_to(PyrexTypes.c_py_ssize_t_type, self.env_stack[-1]) start = start.coerce_to(PyrexTypes.c_py_ssize_t_type, self.env_stack[-1])
if not start.is_simple: if stop:
start = UtilNodes.LetRefNode(start) start = UtilNodes.LetRefNode(start)
temps.append(start) temps.append(start)
string_node = ExprNodes.AddNode(pos=start.pos, string_node = ExprNodes.AddNode(pos=start.pos,
...@@ -1334,7 +1334,7 @@ class OptimizeBuiltinCalls(Visitor.EnvTransform): ...@@ -1334,7 +1334,7 @@ class OptimizeBuiltinCalls(Visitor.EnvTransform):
return node return node
if not stop: if not stop:
if start or not string_node.is_simple: if start or not string_node.is_name:
string_node = UtilNodes.LetRefNode(string_node) string_node = UtilNodes.LetRefNode(string_node)
temps.append(string_node) temps.append(string_node)
stop = ExprNodes.PythonCapiCallNode( stop = ExprNodes.PythonCapiCallNode(
...@@ -1359,6 +1359,8 @@ class OptimizeBuiltinCalls(Visitor.EnvTransform): ...@@ -1359,6 +1359,8 @@ class OptimizeBuiltinCalls(Visitor.EnvTransform):
encoding, encoding_node, error_handling, error_handling_node = parameters encoding, encoding_node, error_handling, error_handling_node = parameters
# try to find a specific encoder function # try to find a specific encoder function
codec_name = None
if encoding is not None:
codec_name = self._find_special_codec_name(encoding) codec_name = self._find_special_codec_name(encoding)
if codec_name is not None: if codec_name is not None:
decode_function = "PyUnicode_Decode%s" % codec_name decode_function = "PyUnicode_Decode%s" % codec_name
...@@ -1397,22 +1399,24 @@ class OptimizeBuiltinCalls(Visitor.EnvTransform): ...@@ -1397,22 +1399,24 @@ class OptimizeBuiltinCalls(Visitor.EnvTransform):
encoding_node = args[1] encoding_node = args[1]
if isinstance(encoding_node, ExprNodes.CoerceToPyTypeNode): if isinstance(encoding_node, ExprNodes.CoerceToPyTypeNode):
encoding_node = encoding_node.arg encoding_node = encoding_node.arg
if not isinstance(encoding_node, (ExprNodes.UnicodeNode, ExprNodes.StringNode, if isinstance(encoding_node, (ExprNodes.UnicodeNode, ExprNodes.StringNode,
ExprNodes.BytesNode)): ExprNodes.BytesNode)):
return None
encoding = encoding_node.value encoding = encoding_node.value
encoding_node = ExprNodes.BytesNode(encoding_node.pos, value=encoding, encoding_node = ExprNodes.BytesNode(encoding_node.pos, value=encoding,
type=PyrexTypes.c_char_ptr_type) type=PyrexTypes.c_char_ptr_type)
elif encoding_node.type.is_string:
encoding = None
else:
return None
null_node = ExprNodes.NullNode(pos) null_node = ExprNodes.NullNode(pos)
if len(args) == 3: if len(args) == 3:
error_handling_node = args[2] error_handling_node = args[2]
if isinstance(error_handling_node, ExprNodes.CoerceToPyTypeNode): if isinstance(error_handling_node, ExprNodes.CoerceToPyTypeNode):
error_handling_node = error_handling_node.arg error_handling_node = error_handling_node.arg
if not isinstance(error_handling_node, if isinstance(error_handling_node,
(ExprNodes.UnicodeNode, ExprNodes.StringNode, (ExprNodes.UnicodeNode, ExprNodes.StringNode,
ExprNodes.BytesNode)): ExprNodes.BytesNode)):
return None
error_handling = error_handling_node.value error_handling = error_handling_node.value
if error_handling == 'strict': if error_handling == 'strict':
error_handling_node = null_node error_handling_node = null_node
...@@ -1420,6 +1424,10 @@ class OptimizeBuiltinCalls(Visitor.EnvTransform): ...@@ -1420,6 +1424,10 @@ class OptimizeBuiltinCalls(Visitor.EnvTransform):
error_handling_node = ExprNodes.BytesNode( error_handling_node = ExprNodes.BytesNode(
error_handling_node.pos, value=error_handling, error_handling_node.pos, value=error_handling,
type=PyrexTypes.c_char_ptr_type) type=PyrexTypes.c_char_ptr_type)
elif error_handling_node.type.is_string:
error_handling = None
else:
return None
else: else:
error_handling = 'strict' error_handling = 'strict'
error_handling_node = null_node error_handling_node = null_node
......
...@@ -13,61 +13,6 @@ def slice_charptr_end(): ...@@ -13,61 +13,6 @@ def slice_charptr_end():
""" """
return cstring[:1], cstring[:3], cstring[:9] return cstring[:1], cstring[:3], cstring[:9]
@cython.test_assert_path_exists("//PythonCapiCallNode")
@cython.test_fail_if_path_exists("//AttributeNode")
def slice_charptr_decode():
"""
>>> print(str(slice_charptr_decode()).replace("u'", "'"))
('a', 'abc', 'abcABCqtp')
"""
return (cstring[:1].decode('UTF-8'),
cstring[:3].decode('UTF-8'),
cstring[:9].decode('UTF-8'))
@cython.test_assert_path_exists("//PythonCapiCallNode")
@cython.test_fail_if_path_exists("//AttributeNode")
def slice_charptr_decode_slice2():
"""
>>> print(str(slice_charptr_decode_slice2()).replace("u'", "'"))
('a', 'bc', 'tp')
"""
return (cstring[0:1].decode('UTF-8'),
cstring[1:3].decode('UTF-8'),
cstring[7:9].decode('UTF-8'))
@cython.test_assert_path_exists("//PythonCapiCallNode")
@cython.test_fail_if_path_exists("//AttributeNode")
def slice_charptr_decode_strlen():
"""
>>> print(str(slice_charptr_decode_strlen()).replace("u'", "'"))
('abcABCqtp', 'bcABCqtp', '')
"""
return (cstring.decode('UTF-8'),
cstring[1:].decode('UTF-8'),
cstring[9:].decode('UTF-8'))
@cython.test_assert_path_exists("//PythonCapiCallNode")
@cython.test_fail_if_path_exists("//AttributeNode")
def slice_charptr_decode_unbound():
"""
>>> print(str(slice_charptr_decode_unbound()).replace("u'", "'"))
('a', 'abc', 'abcABCqtp')
"""
return (bytes.decode(cstring[:1], 'UTF-8'),
bytes.decode(cstring[:3], 'UTF-8', 'replace'),
bytes.decode(cstring[:9], 'UTF-8'))
@cython.test_assert_path_exists("//PythonCapiCallNode")
@cython.test_fail_if_path_exists("//AttributeNode")
def slice_charptr_decode_errormode():
"""
>>> print(str(slice_charptr_decode_errormode()).replace("u'", "'"))
('a', 'abc', 'abcABCqtp')
"""
return (cstring[:1].decode('UTF-8', 'strict'),
cstring[:3].decode('UTF-8', 'replace'),
cstring[:9].decode('UTF-8', 'unicode_escape'))
@cython.test_assert_path_exists("//ForFromStatNode", @cython.test_assert_path_exists("//ForFromStatNode",
"//ForFromStatNode//SliceIndexNode") "//ForFromStatNode//SliceIndexNode")
@cython.test_fail_if_path_exists("//ForInStatNode") @cython.test_fail_if_path_exists("//ForInStatNode")
...@@ -117,7 +62,7 @@ def slice_charptr_for_loop_c(): ...@@ -117,7 +62,7 @@ def slice_charptr_for_loop_c():
@cython.test_fail_if_path_exists("//ForInStatNode") @cython.test_fail_if_path_exists("//ForInStatNode")
def slice_charptr_for_loop_c_dynamic_bounds(): def slice_charptr_for_loop_c_dynamic_bounds():
""" """
>>> slice_charptr_for_loop_c() >>> slice_charptr_for_loop_c_dynamic_bounds()
['a', 'b', 'c'] ['a', 'b', 'c']
['b', 'c', 'A', 'B'] ['b', 'c', 'A', 'B']
['B', 'C', 'q', 't', 'p'] ['B', 'C', 'q', 't', 'p']
......
cimport cython
############################################################
# tests for char* slicing
cdef char* cstring = "abcABCqtp"
@cython.test_assert_path_exists("//PythonCapiCallNode")
@cython.test_fail_if_path_exists("//AttributeNode")
def slice_charptr_decode():
"""
>>> print(str(slice_charptr_decode()).replace("u'", "'"))
('a', 'abc', 'abcABCqtp')
"""
return (cstring[:1].decode('UTF-8'),
cstring[:3].decode('UTF-8'),
cstring[:9].decode('UTF-8'))
@cython.test_assert_path_exists("//PythonCapiCallNode")
@cython.test_fail_if_path_exists("//AttributeNode")
def slice_charptr_decode_unknown_encoding():
"""
>>> print(str(slice_charptr_decode_unknown_encoding()).replace("u'", "'"))
('abcABCqtp', 'abcABCqtp', 'abc', 'abcABCqt')
"""
cdef char* enc = 'UTF-8'
cdef char* error_handling = 'strict'
return (cstring.decode(enc),
cstring.decode(enc, error_handling),
cstring[:3].decode(enc),
cstring[:8].decode(enc, error_handling))
@cython.test_assert_path_exists("//PythonCapiCallNode")
@cython.test_fail_if_path_exists("//AttributeNode")
def slice_charptr_decode_slice2():
"""
>>> print(str(slice_charptr_decode_slice2()).replace("u'", "'"))
('a', 'bc', 'tp')
"""
return (cstring[0:1].decode('UTF-8'),
cstring[1:3].decode('UTF-8'),
cstring[7:9].decode('UTF-8'))
@cython.test_assert_path_exists("//PythonCapiCallNode")
@cython.test_fail_if_path_exists("//AttributeNode")
def slice_charptr_decode_strlen():
"""
>>> print(str(slice_charptr_decode_strlen()).replace("u'", "'"))
('abcABCqtp', 'bcABCqtp', '')
"""
return (cstring.decode('UTF-8'),
cstring[1:].decode('UTF-8'),
cstring[9:].decode('UTF-8'))
@cython.test_assert_path_exists("//PythonCapiCallNode")
@cython.test_fail_if_path_exists("//AttributeNode")
def slice_charptr_decode_unbound():
"""
>>> print(str(slice_charptr_decode_unbound()).replace("u'", "'"))
('a', 'abc', 'abcABCqtp')
"""
return (bytes.decode(cstring[:1], 'UTF-8'),
bytes.decode(cstring[:3], 'UTF-8', 'replace'),
bytes.decode(cstring[:9], 'UTF-8'))
@cython.test_assert_path_exists("//PythonCapiCallNode")
@cython.test_fail_if_path_exists("//AttributeNode")
def slice_charptr_decode_errormode():
"""
>>> print(str(slice_charptr_decode_errormode()).replace("u'", "'"))
('a', 'abc', 'abcABCqtp')
"""
return (cstring[:1].decode('UTF-8', 'strict'),
cstring[:3].decode('UTF-8', 'replace'),
cstring[:9].decode('UTF-8', 'unicode_escape'))
@cython.test_assert_path_exists("//PythonCapiCallNode")
@cython.test_fail_if_path_exists("//AttributeNode")
def slice_charptr_dynamic_bounds():
"""
>>> print(str(slice_charptr_dynamic_bounds()).replace("u'", "'"))
('abc', 'abc', 'bcAB', 'BCqtp')
"""
return (cstring[:return3()].decode('UTF-8'),
cstring[0:return3()].decode('UTF-8'),
cstring[return1():return5()].decode('UTF-8'),
cstring[return4():return9()].decode('UTF-8'))
cdef return1(): return 1
cdef return3(): return 3
cdef return4(): return 4
cdef return5(): return 5
cdef return9(): return 9
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment