Commit 8f88561f authored by Stefan Behnel's avatar Stefan Behnel

optimise unicode.startswith() and unicode.endswith(), drop redundant bint->bool->bint coercions

parent a337a45f
......@@ -414,7 +414,7 @@ def init_builtins():
init_builtin_types()
init_builtin_structs()
global list_type, tuple_type, dict_type, set_type, type_type
global bytes_type, str_type, unicode_type, float_type
global bytes_type, str_type, unicode_type, float_type, bool_type
type_type = builtin_scope.lookup('type').type
list_type = builtin_scope.lookup('list').type
tuple_type = builtin_scope.lookup('tuple').type
......@@ -424,5 +424,6 @@ def init_builtins():
str_type = builtin_scope.lookup('str').type
unicode_type = builtin_scope.lookup('unicode').type
float_type = builtin_scope.lookup('float').type
bool_type = builtin_scope.lookup('bool').type
init_builtins()
......@@ -937,6 +937,16 @@ class OptimizeBuiltinCalls(Visitor.EnvTransform):
return node
return node.arg
def visit_CoerceToBooleanNode(self, node):
"""Drop redundant conversion nodes after tree changes.
"""
self.visitchildren(node)
arg = node.arg
if isinstance(arg, ExprNodes.CoerceToPyTypeNode):
if arg.type in (PyrexTypes.py_object_type, Builtin.bool_type):
return arg.arg.coerce_to_boolean(self.env_stack[-1])
return node
def visit_CoerceFromPyTypeNode(self, node):
"""Drop redundant conversion nodes after tree changes.
......@@ -1437,8 +1447,7 @@ class OptimizeBuiltinCalls(Visitor.EnvTransform):
if len(args) < 2:
args.append(ExprNodes.BoolNode(node.pos, value=False))
else:
args[1] = args[1].coerce_to(PyrexTypes.c_bint_type,
self.env_stack[-1])
args[1] = args[1].coerce_to_boolean(self.env_stack[-1])
return self._substitute_method_call(
node, "PyUnicode_Splitlines", self.PyUnicode_Splitlines_func_type,
......@@ -1490,6 +1499,54 @@ class OptimizeBuiltinCalls(Visitor.EnvTransform):
node, "PyUnicode_Split", self.PyUnicode_Split_func_type,
'split', is_unbound_method, args)
PyUnicode_Tailmatch_func_type = PyrexTypes.CFuncType(
PyrexTypes.c_bint_type, [
PyrexTypes.CFuncTypeArg("str", Builtin.unicode_type, None),
PyrexTypes.CFuncTypeArg("substring", PyrexTypes.py_object_type, None),
PyrexTypes.CFuncTypeArg("start", PyrexTypes.c_py_ssize_t_type, None),
PyrexTypes.CFuncTypeArg("end", PyrexTypes.c_py_ssize_t_type, None),
PyrexTypes.CFuncTypeArg("direction", PyrexTypes.c_int_type, None),
],
exception_value = '-1')
def _handle_simple_method_unicode_endswith(self, node, args, is_unbound_method):
return self._inject_unicode_tailmatch(
node, args, is_unbound_method, 'endswith', +1)
def _handle_simple_method_unicode_startswith(self, node, args, is_unbound_method):
return self._inject_unicode_tailmatch(
node, args, is_unbound_method, 'startswith', -1)
def _inject_unicode_tailmatch(self, node, args, is_unbound_method,
method_name, direction):
"""Replace unicode.startswith(...) and unicode.endswith(...)
by a direct call to the corresponding C-API function.
"""
if len(args) not in (2,3,4):
self._error_wrong_arg_count('unicode.%s' % method_name, node, args, "2-4")
return node
if len(args) < 3:
args.append(ExprNodes.IntNode(
node.pos, value="0", type=PyrexTypes.c_py_ssize_t_type))
else:
args[2] = args[2].coerce_to(PyrexTypes.c_py_ssize_t_type,
self.env_stack[-1])
if len(args) < 4:
args.append(ExprNodes.IntNode(
node.pos, value="PY_SSIZE_T_MAX", type=PyrexTypes.c_py_ssize_t_type))
else:
args[3] = args[3].coerce_to(PyrexTypes.c_py_ssize_t_type,
self.env_stack[-1])
args.append(ExprNodes.IntNode(
node.pos, value=str(direction), type=PyrexTypes.c_int_type))
method_call = self._substitute_method_call(
node, "__Pyx_PyUnicode_Tailmatch", self.PyUnicode_Tailmatch_func_type,
method_name, is_unbound_method, args,
utility_code = unicode_tailmatch_utility_code)
return ExprNodes.CoerceToPyTypeNode(
method_call, self.env_stack[-1], Builtin.bool_type)
PyUnicode_AsEncodedString_func_type = PyrexTypes.CFuncType(
Builtin.bytes_type, [
PyrexTypes.CFuncTypeArg("obj", Builtin.unicode_type, None),
......@@ -1740,6 +1797,34 @@ class OptimizeBuiltinCalls(Visitor.EnvTransform):
)
unicode_tailmatch_utility_code = UtilityCode(
# Python's unicode.startswith() and unicode.endswith() support a
# tuple of prefixes/suffixes, whereas it's much more common to
# test for a single unicode string.
proto = '''
static int __Pyx_PyUnicode_Tailmatch(PyObject* s, PyObject* substr, \
Py_ssize_t start, Py_ssize_t end, int direction);
''',
impl = '''
static int __Pyx_PyUnicode_Tailmatch(PyObject* s, PyObject* substr,
Py_ssize_t start, Py_ssize_t end, int direction) {
if (unlikely(PyTuple_Check(substr))) {
int result;
Py_ssize_t i;
for (i = 0; i < PyTuple_GET_SIZE(substr); i++) {
result = PyUnicode_Tailmatch(s, PyTuple_GET_ITEM(substr, i),
start, end, direction);
if (result) {
return result;
}
}
return 0;
}
return PyUnicode_Tailmatch(s, substr, start, end, direction);
}
''',
)
dict_getitem_default_utility_code = UtilityCode(
proto = '''
static PyObject* __Pyx_PyDict_GetItemDefault(PyObject* d, PyObject* key, PyObject* default_value) {
......
......@@ -136,8 +136,7 @@ def splitlines_keep(unicode s, keep):
return s.splitlines(keep)
@cython.test_fail_if_path_exists(
# boolean conversion isn't currently smart enough for this ...
# "//CoerceToPyTypeNode", "//CoerceFromPyTypeNode",
"//CoerceToPyTypeNode", "//CoerceFromPyTypeNode",
"//CastNode", "//TypecastNode")
@cython.test_assert_path_exists(
"//PythonCapiCallNode")
......@@ -208,3 +207,124 @@ def join_sep(l):
ab|jd|sdflk|as|sa|sadas|asdas|fsdf
"""
return u'|'.join(l)
# unicode.startswith(s, prefix, [start, [end]])
@cython.test_fail_if_path_exists(
"//CoerceToPyTypeNode",
"//CoerceFromPyTypeNode",
"//CastNode", "//TypecastNode")
@cython.test_assert_path_exists(
"//PythonCapiCallNode")
def startswith(unicode s, sub):
"""
>>> text.startswith('ab ')
True
>>> startswith(text, 'ab ')
'MATCH'
>>> text.startswith('ab X')
False
>>> startswith(text, 'ab X')
'NO MATCH'
>>> text.startswith(('ab', 'ab '))
True
>>> startswith(text, ('ab', 'ab '))
'MATCH'
>>> text.startswith((' ab', 'ab X'))
False
>>> startswith(text, (' ab', 'ab X'))
'NO MATCH'
"""
if s.startswith(sub):
return 'MATCH'
else:
return 'NO MATCH'
@cython.test_fail_if_path_exists(
"//CoerceToPyTypeNode",
"//CastNode", "//TypecastNode")
@cython.test_assert_path_exists(
"//CoerceFromPyTypeNode",
"//PythonCapiCallNode")
def startswith_start_end(unicode s, sub, start, end):
"""
>>> text.startswith('b ', 1, 5)
True
>>> startswith_start_end(text, 'b ', 1, 5)
'MATCH'
>>> text.startswith('b X', 1, 5)
False
>>> startswith_start_end(text, 'b X', 1, 5)
'NO MATCH'
"""
if s.startswith(sub, start, end):
return 'MATCH'
else:
return 'NO MATCH'
# unicode.endswith(s, prefix, [start, [end]])
@cython.test_fail_if_path_exists(
"//CoerceToPyTypeNode",
"//CoerceFromPyTypeNode",
"//CastNode", "//TypecastNode")
@cython.test_assert_path_exists(
"//PythonCapiCallNode")
def endswith(unicode s, sub):
"""
>>> text.endswith('fsdf ')
True
>>> endswith(text, 'fsdf ')
'MATCH'
>>> text.endswith('fsdf X')
False
>>> endswith(text, 'fsdf X')
'NO MATCH'
>>> text.endswith(('fsdf', 'fsdf '))
True
>>> endswith(text, ('fsdf', 'fsdf '))
'MATCH'
>>> text.endswith(('fsdf', 'fsdf X'))
False
>>> endswith(text, ('fsdf', 'fsdf X'))
'NO MATCH'
"""
if s.endswith(sub):
return 'MATCH'
else:
return 'NO MATCH'
@cython.test_fail_if_path_exists(
"//CoerceToPyTypeNode",
"//CastNode", "//TypecastNode")
@cython.test_assert_path_exists(
"//CoerceFromPyTypeNode",
"//PythonCapiCallNode")
def endswith_start_end(unicode s, sub, start, end):
"""
>>> text.endswith('fsdf', 10, len(text)-1)
True
>>> endswith_start_end(text, 'fsdf', 10, len(text)-1)
'MATCH'
>>> text.endswith('fsdf ', 10, len(text)-1)
False
>>> endswith_start_end(text, 'fsdf ', 10, len(text)-1)
'NO MATCH'
>>> text.endswith(('fsd', 'fsdf'), 10, len(text)-1)
True
>>> endswith_start_end(text, ('fsd', 'fsdf'), 10, len(text)-1)
'MATCH'
>>> text.endswith(('fsdf ', 'fsdf X'), 10, len(text)-1)
False
>>> endswith_start_end(text, ('fsdf ', 'fsdf X'), 10, len(text)-1)
'NO MATCH'
"""
if s.endswith(sub, start, end):
return 'MATCH'
else:
return 'NO MATCH'
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment