Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Labels
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Commits
Open sidebar
nexedi
cython
Commits
8f88561f
Commit
8f88561f
authored
Mar 21, 2010
by
Stefan Behnel
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
optimise unicode.startswith() and unicode.endswith(), drop redundant bint->bool->bint coercions
parent
a337a45f
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
211 additions
and
5 deletions
+211
-5
Cython/Compiler/Builtin.py
Cython/Compiler/Builtin.py
+2
-1
Cython/Compiler/Optimize.py
Cython/Compiler/Optimize.py
+87
-2
tests/run/unicodemethods.pyx
tests/run/unicodemethods.pyx
+122
-2
No files found.
Cython/Compiler/Builtin.py
View file @
8f88561f
...
...
@@ -414,7 +414,7 @@ def init_builtins():
init_builtin_types
()
init_builtin_structs
()
global
list_type
,
tuple_type
,
dict_type
,
set_type
,
type_type
global
bytes_type
,
str_type
,
unicode_type
,
float_type
global
bytes_type
,
str_type
,
unicode_type
,
float_type
,
bool_type
type_type
=
builtin_scope
.
lookup
(
'type'
).
type
list_type
=
builtin_scope
.
lookup
(
'list'
).
type
tuple_type
=
builtin_scope
.
lookup
(
'tuple'
).
type
...
...
@@ -424,5 +424,6 @@ def init_builtins():
str_type
=
builtin_scope
.
lookup
(
'str'
).
type
unicode_type
=
builtin_scope
.
lookup
(
'unicode'
).
type
float_type
=
builtin_scope
.
lookup
(
'float'
).
type
bool_type
=
builtin_scope
.
lookup
(
'bool'
).
type
init_builtins
()
Cython/Compiler/Optimize.py
View file @
8f88561f
...
...
@@ -937,6 +937,16 @@ class OptimizeBuiltinCalls(Visitor.EnvTransform):
return
node
return
node
.
arg
def
visit_CoerceToBooleanNode
(
self
,
node
):
"""Drop redundant conversion nodes after tree changes.
"""
self
.
visitchildren
(
node
)
arg
=
node
.
arg
if
isinstance
(
arg
,
ExprNodes
.
CoerceToPyTypeNode
):
if
arg
.
type
in
(
PyrexTypes
.
py_object_type
,
Builtin
.
bool_type
):
return
arg
.
arg
.
coerce_to_boolean
(
self
.
env_stack
[
-
1
])
return
node
def
visit_CoerceFromPyTypeNode
(
self
,
node
):
"""Drop redundant conversion nodes after tree changes.
...
...
@@ -1437,8 +1447,7 @@ class OptimizeBuiltinCalls(Visitor.EnvTransform):
if
len
(
args
)
<
2
:
args
.
append
(
ExprNodes
.
BoolNode
(
node
.
pos
,
value
=
False
))
else
:
args
[
1
]
=
args
[
1
].
coerce_to
(
PyrexTypes
.
c_bint_type
,
self
.
env_stack
[
-
1
])
args
[
1
]
=
args
[
1
].
coerce_to_boolean
(
self
.
env_stack
[
-
1
])
return
self
.
_substitute_method_call
(
node
,
"PyUnicode_Splitlines"
,
self
.
PyUnicode_Splitlines_func_type
,
...
...
@@ -1490,6 +1499,54 @@ class OptimizeBuiltinCalls(Visitor.EnvTransform):
node
,
"PyUnicode_Split"
,
self
.
PyUnicode_Split_func_type
,
'split'
,
is_unbound_method
,
args
)
PyUnicode_Tailmatch_func_type
=
PyrexTypes
.
CFuncType
(
PyrexTypes
.
c_bint_type
,
[
PyrexTypes
.
CFuncTypeArg
(
"str"
,
Builtin
.
unicode_type
,
None
),
PyrexTypes
.
CFuncTypeArg
(
"substring"
,
PyrexTypes
.
py_object_type
,
None
),
PyrexTypes
.
CFuncTypeArg
(
"start"
,
PyrexTypes
.
c_py_ssize_t_type
,
None
),
PyrexTypes
.
CFuncTypeArg
(
"end"
,
PyrexTypes
.
c_py_ssize_t_type
,
None
),
PyrexTypes
.
CFuncTypeArg
(
"direction"
,
PyrexTypes
.
c_int_type
,
None
),
],
exception_value
=
'-1'
)
def
_handle_simple_method_unicode_endswith
(
self
,
node
,
args
,
is_unbound_method
):
return
self
.
_inject_unicode_tailmatch
(
node
,
args
,
is_unbound_method
,
'endswith'
,
+
1
)
def
_handle_simple_method_unicode_startswith
(
self
,
node
,
args
,
is_unbound_method
):
return
self
.
_inject_unicode_tailmatch
(
node
,
args
,
is_unbound_method
,
'startswith'
,
-
1
)
def
_inject_unicode_tailmatch
(
self
,
node
,
args
,
is_unbound_method
,
method_name
,
direction
):
"""Replace unicode.startswith(...) and unicode.endswith(...)
by a direct call to the corresponding C-API function.
"""
if
len
(
args
)
not
in
(
2
,
3
,
4
):
self
.
_error_wrong_arg_count
(
'unicode.%s'
%
method_name
,
node
,
args
,
"2-4"
)
return
node
if
len
(
args
)
<
3
:
args
.
append
(
ExprNodes
.
IntNode
(
node
.
pos
,
value
=
"0"
,
type
=
PyrexTypes
.
c_py_ssize_t_type
))
else
:
args
[
2
]
=
args
[
2
].
coerce_to
(
PyrexTypes
.
c_py_ssize_t_type
,
self
.
env_stack
[
-
1
])
if
len
(
args
)
<
4
:
args
.
append
(
ExprNodes
.
IntNode
(
node
.
pos
,
value
=
"PY_SSIZE_T_MAX"
,
type
=
PyrexTypes
.
c_py_ssize_t_type
))
else
:
args
[
3
]
=
args
[
3
].
coerce_to
(
PyrexTypes
.
c_py_ssize_t_type
,
self
.
env_stack
[
-
1
])
args
.
append
(
ExprNodes
.
IntNode
(
node
.
pos
,
value
=
str
(
direction
),
type
=
PyrexTypes
.
c_int_type
))
method_call
=
self
.
_substitute_method_call
(
node
,
"__Pyx_PyUnicode_Tailmatch"
,
self
.
PyUnicode_Tailmatch_func_type
,
method_name
,
is_unbound_method
,
args
,
utility_code
=
unicode_tailmatch_utility_code
)
return
ExprNodes
.
CoerceToPyTypeNode
(
method_call
,
self
.
env_stack
[
-
1
],
Builtin
.
bool_type
)
PyUnicode_AsEncodedString_func_type
=
PyrexTypes
.
CFuncType
(
Builtin
.
bytes_type
,
[
PyrexTypes
.
CFuncTypeArg
(
"obj"
,
Builtin
.
unicode_type
,
None
),
...
...
@@ -1740,6 +1797,34 @@ class OptimizeBuiltinCalls(Visitor.EnvTransform):
)
unicode_tailmatch_utility_code
=
UtilityCode
(
# Python's unicode.startswith() and unicode.endswith() support a
# tuple of prefixes/suffixes, whereas it's much more common to
# test for a single unicode string.
proto
=
'''
static int __Pyx_PyUnicode_Tailmatch(PyObject* s, PyObject* substr,
\
Py_ssize_t start, Py_ssize_t end, int direction);
'''
,
impl
=
'''
static int __Pyx_PyUnicode_Tailmatch(PyObject* s, PyObject* substr,
Py_ssize_t start, Py_ssize_t end, int direction) {
if (unlikely(PyTuple_Check(substr))) {
int result;
Py_ssize_t i;
for (i = 0; i < PyTuple_GET_SIZE(substr); i++) {
result = PyUnicode_Tailmatch(s, PyTuple_GET_ITEM(substr, i),
start, end, direction);
if (result) {
return result;
}
}
return 0;
}
return PyUnicode_Tailmatch(s, substr, start, end, direction);
}
'''
,
)
dict_getitem_default_utility_code
=
UtilityCode
(
proto
=
'''
static PyObject* __Pyx_PyDict_GetItemDefault(PyObject* d, PyObject* key, PyObject* default_value) {
...
...
tests/run/unicodemethods.pyx
View file @
8f88561f
...
...
@@ -136,8 +136,7 @@ def splitlines_keep(unicode s, keep):
return
s
.
splitlines
(
keep
)
@
cython
.
test_fail_if_path_exists
(
# boolean conversion isn't currently smart enough for this ...
# "//CoerceToPyTypeNode", "//CoerceFromPyTypeNode",
"//CoerceToPyTypeNode"
,
"//CoerceFromPyTypeNode"
,
"//CastNode"
,
"//TypecastNode"
)
@
cython
.
test_assert_path_exists
(
"//PythonCapiCallNode"
)
...
...
@@ -208,3 +207,124 @@ def join_sep(l):
ab|jd|sdflk|as|sa|sadas|asdas|fsdf
"""
return
u'|'
.
join
(
l
)
# unicode.startswith(s, prefix, [start, [end]])
@
cython
.
test_fail_if_path_exists
(
"//CoerceToPyTypeNode"
,
"//CoerceFromPyTypeNode"
,
"//CastNode"
,
"//TypecastNode"
)
@
cython
.
test_assert_path_exists
(
"//PythonCapiCallNode"
)
def
startswith
(
unicode
s
,
sub
):
"""
>>> text.startswith('ab ')
True
>>> startswith(text, 'ab ')
'MATCH'
>>> text.startswith('ab X')
False
>>> startswith(text, 'ab X')
'NO MATCH'
>>> text.startswith(('ab', 'ab '))
True
>>> startswith(text, ('ab', 'ab '))
'MATCH'
>>> text.startswith((' ab', 'ab X'))
False
>>> startswith(text, (' ab', 'ab X'))
'NO MATCH'
"""
if
s
.
startswith
(
sub
):
return
'MATCH'
else
:
return
'NO MATCH'
@
cython
.
test_fail_if_path_exists
(
"//CoerceToPyTypeNode"
,
"//CastNode"
,
"//TypecastNode"
)
@
cython
.
test_assert_path_exists
(
"//CoerceFromPyTypeNode"
,
"//PythonCapiCallNode"
)
def
startswith_start_end
(
unicode
s
,
sub
,
start
,
end
):
"""
>>> text.startswith('b ', 1, 5)
True
>>> startswith_start_end(text, 'b ', 1, 5)
'MATCH'
>>> text.startswith('b X', 1, 5)
False
>>> startswith_start_end(text, 'b X', 1, 5)
'NO MATCH'
"""
if
s
.
startswith
(
sub
,
start
,
end
):
return
'MATCH'
else
:
return
'NO MATCH'
# unicode.endswith(s, prefix, [start, [end]])
@
cython
.
test_fail_if_path_exists
(
"//CoerceToPyTypeNode"
,
"//CoerceFromPyTypeNode"
,
"//CastNode"
,
"//TypecastNode"
)
@
cython
.
test_assert_path_exists
(
"//PythonCapiCallNode"
)
def
endswith
(
unicode
s
,
sub
):
"""
>>> text.endswith('fsdf ')
True
>>> endswith(text, 'fsdf ')
'MATCH'
>>> text.endswith('fsdf X')
False
>>> endswith(text, 'fsdf X')
'NO MATCH'
>>> text.endswith(('fsdf', 'fsdf '))
True
>>> endswith(text, ('fsdf', 'fsdf '))
'MATCH'
>>> text.endswith(('fsdf', 'fsdf X'))
False
>>> endswith(text, ('fsdf', 'fsdf X'))
'NO MATCH'
"""
if
s
.
endswith
(
sub
):
return
'MATCH'
else
:
return
'NO MATCH'
@
cython
.
test_fail_if_path_exists
(
"//CoerceToPyTypeNode"
,
"//CastNode"
,
"//TypecastNode"
)
@
cython
.
test_assert_path_exists
(
"//CoerceFromPyTypeNode"
,
"//PythonCapiCallNode"
)
def
endswith_start_end
(
unicode
s
,
sub
,
start
,
end
):
"""
>>> text.endswith('fsdf', 10, len(text)-1)
True
>>> endswith_start_end(text, 'fsdf', 10, len(text)-1)
'MATCH'
>>> text.endswith('fsdf ', 10, len(text)-1)
False
>>> endswith_start_end(text, 'fsdf ', 10, len(text)-1)
'NO MATCH'
>>> text.endswith(('fsd', 'fsdf'), 10, len(text)-1)
True
>>> endswith_start_end(text, ('fsd', 'fsdf'), 10, len(text)-1)
'MATCH'
>>> text.endswith(('fsdf ', 'fsdf X'), 10, len(text)-1)
False
>>> endswith_start_end(text, ('fsdf ', 'fsdf X'), 10, len(text)-1)
'NO MATCH'
"""
if
s
.
endswith
(
sub
,
start
,
end
):
return
'MATCH'
else
:
return
'NO MATCH'
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment