Commit 7245498b authored by Stefan Behnel's avatar Stefan Behnel

support "c_string.decode(enc)" and "c_string[x:].decode(enc)" efficiently

parent 468648c9
...@@ -1297,23 +1297,59 @@ class OptimizeBuiltinCalls(Visitor.EnvTransform): ...@@ -1297,23 +1297,59 @@ class OptimizeBuiltinCalls(Visitor.EnvTransform):
if len(args) < 1 or len(args) > 3: if len(args) < 1 or len(args) > 3:
self._error_wrong_arg_count('bytes.decode', node, args, '1-3') self._error_wrong_arg_count('bytes.decode', node, args, '1-3')
return node return node
if not isinstance(args[0], ExprNodes.SliceIndexNode): temps = []
# we need the string length as a slice end index if isinstance(args[0], ExprNodes.SliceIndexNode):
return node index_node = args[0]
index_node = args[0] string_node = index_node.base
string_node = index_node.base if not string_node.type.is_string:
if not string_node.type.is_string: # nothing to optimise here
# nothing to optimise here return node
start, stop = index_node.start, index_node.stop
if not start or start.constant_result == 0:
start = None
else:
if start.type.is_pyobject:
start = start.coerce_to(PyrexTypes.c_py_ssize_t_type, self.env_stack[-1])
if not start.is_simple:
start = UtilNodes.LetRefNode(start)
temps.append(start)
string_node = ExprNodes.AddNode(pos=start.pos,
operand1=string_node,
operator='+',
operand2=start,
is_temp=False,
type=string_node.type
)
if stop and stop.type.is_pyobject:
stop = stop.coerce_to(PyrexTypes.c_py_ssize_t_type, self.env_stack[-1])
elif isinstance(args[0], ExprNodes.CoerceToPyTypeNode) \
and args[0].arg.type.is_string:
# use strlen() to find the string length, just as CPython would
start = stop = None
string_node = args[0].arg
else:
# let Python do its job
return node return node
start, stop = index_node.start, index_node.stop
if not stop: if not stop:
# FIXME: could use strlen() - although Python will do that anyway ... if start or not string_node.is_simple:
return node string_node = UtilNodes.LetRefNode(string_node)
if stop.type.is_pyobject: temps.append(string_node)
stop = stop.coerce_to(PyrexTypes.c_py_ssize_t_type, self.env_stack[-1]) stop = ExprNodes.PythonCapiCallNode(
if start and start.constant_result != 0: string_node.pos, "strlen", self.Pyx_strlen_func_type,
# FIXME: put start into a temp and do the math args = [string_node],
return node is_temp = False,
utility_code = include_string_h_utility_code,
).coerce_to(PyrexTypes.c_py_ssize_t_type, self.env_stack[-1])
elif start:
stop = ExprNodes.SubNode(
pos = stop.pos,
operand1 = stop,
operator = '-',
operand2 = start,
is_temp = False,
type = PyrexTypes.c_py_ssize_t_type
)
parameters = self._unpack_encoding_and_error_mode(node.pos, args) parameters = self._unpack_encoding_and_error_mode(node.pos, args)
if parameters is None: if parameters is None:
...@@ -1324,19 +1360,23 @@ class OptimizeBuiltinCalls(Visitor.EnvTransform): ...@@ -1324,19 +1360,23 @@ class OptimizeBuiltinCalls(Visitor.EnvTransform):
codec_name = self._find_special_codec_name(encoding) codec_name = self._find_special_codec_name(encoding)
if codec_name is not None: if codec_name is not None:
decode_function = "PyUnicode_Decode%s" % codec_name decode_function = "PyUnicode_Decode%s" % codec_name
return ExprNodes.PythonCapiCallNode( node = ExprNodes.PythonCapiCallNode(
node.pos, decode_function, node.pos, decode_function,
self.PyUnicode_DecodeXyz_func_type, self.PyUnicode_DecodeXyz_func_type,
args = [string_node, stop, error_handling_node], args = [string_node, stop, error_handling_node],
is_temp = node.is_temp, is_temp = node.is_temp,
) )
else:
node = ExprNodes.PythonCapiCallNode(
node.pos, "PyUnicode_Decode",
self.PyUnicode_Decode_func_type,
args = [string_node, stop, encoding_node, error_handling_node],
is_temp = node.is_temp,
)
return ExprNodes.PythonCapiCallNode( for temp in temps[::-1]:
node.pos, "PyUnicode_Decode", node = UtilNodes.EvalWithTempExprNode(temp, node)
self.PyUnicode_Decode_func_type, return node
args = [string_node, stop, encoding_node, error_handling_node],
is_temp = node.is_temp,
)
def _find_special_codec_name(self, encoding): def _find_special_codec_name(self, encoding):
try: try:
......
...@@ -24,6 +24,28 @@ def slice_charptr_decode(): ...@@ -24,6 +24,28 @@ def slice_charptr_decode():
cstring[:3].decode('UTF-8'), cstring[:3].decode('UTF-8'),
cstring[:9].decode('UTF-8')) cstring[:9].decode('UTF-8'))
@cython.test_assert_path_exists("//PythonCapiCallNode")
@cython.test_fail_if_path_exists("//AttributeNode")
def slice_charptr_decode_slice2():
"""
>>> print(str(slice_charptr_decode_slice2()).replace("u'", "'"))
('a', 'bc', 'tp')
"""
return (cstring[0:1].decode('UTF-8'),
cstring[1:3].decode('UTF-8'),
cstring[7:9].decode('UTF-8'))
@cython.test_assert_path_exists("//PythonCapiCallNode")
@cython.test_fail_if_path_exists("//AttributeNode")
def slice_charptr_decode_strlen():
"""
>>> print(str(slice_charptr_decode_strlen()).replace("u'", "'"))
('abcABCqtp', 'bcABCqtp', '')
"""
return (cstring.decode('UTF-8'),
cstring[1:].decode('UTF-8'),
cstring[9:].decode('UTF-8'))
@cython.test_assert_path_exists("//PythonCapiCallNode") @cython.test_assert_path_exists("//PythonCapiCallNode")
@cython.test_fail_if_path_exists("//AttributeNode") @cython.test_fail_if_path_exists("//AttributeNode")
def slice_charptr_decode_unbound(): def slice_charptr_decode_unbound():
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment