Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Labels
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Commits
Open sidebar
nexedi
cython
Commits
9cb1c32f
Commit
9cb1c32f
authored
May 16, 2010
by
Stefan Behnel
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
optimise predicates and methods for single unicode characters
parent
850c831d
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
122 additions
and
1 deletion
+122
-1
Cython/Compiler/ExprNodes.py
Cython/Compiler/ExprNodes.py
+3
-1
Cython/Compiler/Optimize.py
Cython/Compiler/Optimize.py
+77
-0
tests/run/py_unicode_type.pyx
tests/run/py_unicode_type.pyx
+42
-0
No files found.
Cython/Compiler/ExprNodes.py
View file @
9cb1c32f
...
...
@@ -6217,7 +6217,9 @@ class CoerceToPyTypeNode(CoercionNode):
if
type
is
not
py_object_type
:
self
.
type
=
py_object_type
elif
arg
.
type
.
is_string
:
self
.
type
=
Builtin
.
bytes_type
self
.
type
=
bytes_type
elif
arg
.
type
is
PyrexTypes
.
c_py_unicode_type
:
self
.
type
=
unicode_type
gil_message
=
"Converting to Python object"
...
...
Cython/Compiler/Optimize.py
View file @
9cb1c32f
...
...
@@ -1736,6 +1736,71 @@ class OptimizeBuiltinCalls(Visitor.EnvTransform):
### unicode type methods
PyUnicode_uchar_predicate_func_type
=
PyrexTypes
.
CFuncType
(
PyrexTypes
.
c_bint_type
,
[
PyrexTypes
.
CFuncTypeArg
(
"uchar"
,
PyrexTypes
.
c_py_unicode_type
,
None
),
])
def
_inject_unicode_predicate
(
self
,
node
,
args
,
is_unbound_method
):
if
is_unbound_method
or
len
(
args
)
!=
1
:
return
node
ustring
=
args
[
0
]
if
not
isinstance
(
ustring
,
ExprNodes
.
CoerceToPyTypeNode
)
or
\
ustring
.
arg
.
type
is
not
PyrexTypes
.
c_py_unicode_type
:
return
node
uchar
=
ustring
.
arg
method_name
=
node
.
function
.
attribute
if
method_name
==
'istitle'
:
# istitle() doesn't directly map to Py_UNICODE_ISTITLE()
utility_code
=
py_unicode_istitle_utility_code
function_name
=
'__Pyx_Py_UNICODE_ISTITLE'
else
:
utility_code
=
None
function_name
=
'Py_UNICODE_%s'
%
method_name
.
upper
()
func_call
=
self
.
_substitute_method_call
(
node
,
function_name
,
self
.
PyUnicode_uchar_predicate_func_type
,
method_name
,
is_unbound_method
,
[
uchar
],
utility_code
=
utility_code
)
if
node
.
type
.
is_pyobject
:
func_call
=
func_call
.
coerce_to_pyobject
(
self
.
current_env
)
return
func_call
_handle_simple_method_unicode_isalnum
=
_inject_unicode_predicate
_handle_simple_method_unicode_isalpha
=
_inject_unicode_predicate
_handle_simple_method_unicode_isdecimal
=
_inject_unicode_predicate
_handle_simple_method_unicode_isdigit
=
_inject_unicode_predicate
_handle_simple_method_unicode_islower
=
_inject_unicode_predicate
_handle_simple_method_unicode_isnumeric
=
_inject_unicode_predicate
_handle_simple_method_unicode_isspace
=
_inject_unicode_predicate
_handle_simple_method_unicode_istitle
=
_inject_unicode_predicate
_handle_simple_method_unicode_isupper
=
_inject_unicode_predicate
PyUnicode_uchar_conversion_func_type
=
PyrexTypes
.
CFuncType
(
PyrexTypes
.
c_py_unicode_type
,
[
PyrexTypes
.
CFuncTypeArg
(
"uchar"
,
PyrexTypes
.
c_py_unicode_type
,
None
),
])
def
_inject_unicode_character_conversion
(
self
,
node
,
args
,
is_unbound_method
):
if
is_unbound_method
or
len
(
args
)
!=
1
:
return
node
ustring
=
args
[
0
]
if
not
isinstance
(
ustring
,
ExprNodes
.
CoerceToPyTypeNode
)
or
\
ustring
.
arg
.
type
is
not
PyrexTypes
.
c_py_unicode_type
:
return
node
uchar
=
ustring
.
arg
method_name
=
node
.
function
.
attribute
function_name
=
'Py_UNICODE_TO%s'
%
method_name
.
upper
()
func_call
=
self
.
_substitute_method_call
(
node
,
function_name
,
self
.
PyUnicode_uchar_conversion_func_type
,
method_name
,
is_unbound_method
,
[
uchar
])
if
node
.
type
.
is_pyobject
:
func_call
=
func_call
.
coerce_to_pyobject
(
self
.
current_env
)
return
func_call
_handle_simple_method_unicode_lower
=
_inject_unicode_character_conversion
_handle_simple_method_unicode_upper
=
_inject_unicode_character_conversion
_handle_simple_method_unicode_title
=
_inject_unicode_character_conversion
PyUnicode_Splitlines_func_type
=
PyrexTypes
.
CFuncType
(
Builtin
.
list_type
,
[
PyrexTypes
.
CFuncTypeArg
(
"str"
,
Builtin
.
unicode_type
,
None
),
...
...
@@ -2196,6 +2261,18 @@ class OptimizeBuiltinCalls(Visitor.EnvTransform):
args
[
arg_index
]
=
args
[
arg_index
].
coerce_to_boolean
(
self
.
current_env
())
py_unicode_istitle_utility_code
=
UtilityCode
(
# Py_UNICODE_ISTITLE() doesn't match unicode.istitle() as the latter
# additionally allows character that comply with Py_UNICODE_ISUPPER()
proto
=
'''
static CYTHON_INLINE int __Pyx_Py_UNICODE_ISTITLE(Py_UNICODE uchar); /* proto */
'''
,
impl
=
'''
static CYTHON_INLINE int __Pyx_Py_UNICODE_ISTITLE(Py_UNICODE uchar) {
return Py_UNICODE_ISTITLE(uchar) || Py_UNICODE_ISUPPER(uchar);
}
'''
)
unicode_tailmatch_utility_code
=
UtilityCode
(
# Python's unicode.startswith() and unicode.endswith() support a
# tuple of prefixes/suffixes, whereas it's much more common to
...
...
tests/run/py_unicode_type.pyx
View file @
9cb1c32f
...
...
@@ -77,3 +77,45 @@ def unicode_ordinal(Py_UNICODE i):
ValueError: only single character unicode strings can be converted to Py_UNICODE, got length 2
"""
return
i
@
cython
.
test_assert_path_exists
(
'//PythonCapiCallNode'
)
@
cython
.
test_fail_if_path_exists
(
'//SimpleCallNode'
)
def
unicode_type_methods
(
Py_UNICODE
uchar
):
"""
>>> unicode_type_methods(ord('A'))
[True, True, False, False, False, False, False, True, True]
>>> unicode_type_methods(ord('a'))
[True, True, False, False, True, False, False, False, False]
>>> unicode_type_methods(ord('8'))
[True, False, True, True, False, True, False, False, False]
>>> unicode_type_methods(ord('
\
\
t'))
[False, False, False, False, False, False, True, False, False]
"""
return
[
# character types
uchar
.
isalnum
(),
uchar
.
isalpha
(),
uchar
.
isdecimal
(),
uchar
.
isdigit
(),
uchar
.
islower
(),
uchar
.
isnumeric
(),
uchar
.
isspace
(),
uchar
.
istitle
(),
uchar
.
isupper
(),
]
@
cython
.
test_assert_path_exists
(
'//PythonCapiCallNode'
)
@
cython
.
test_fail_if_path_exists
(
'//SimpleCallNode'
)
def
unicode_methods
(
Py_UNICODE
uchar
):
"""
>>> unicode_methods(ord('A')) == ['a', 'A', 'A']
True
>>> unicode_methods(ord('a')) == ['a', 'A', 'A']
True
"""
return
[
# character conversion
uchar
.
lower
(),
uchar
.
upper
(),
uchar
.
title
(),
]
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment