Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Gwenaël Samain
cython
Commits
f180a00f
Commit
f180a00f
authored
Jul 03, 2010
by
Stefan Behnel
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
fix "Py_UNICODE in ..." against wide unicode literals on narrow Unicode platforms
parent
015b5ef0
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
51 additions
and
0 deletions
+51
-0
Cython/Compiler/ExprNodes.py
Cython/Compiler/ExprNodes.py
+17
-0
Cython/Compiler/Optimize.py
Cython/Compiler/Optimize.py
+6
-0
tests/run/inop.pyx
tests/run/inop.pyx
+28
-0
No files found.
Cython/Compiler/ExprNodes.py
View file @
f180a00f
...
@@ -969,6 +969,23 @@ class UnicodeNode(PyConstNode):
...
@@ -969,6 +969,23 @@ class UnicodeNode(PyConstNode):
def
can_coerce_to_char_literal
(
self
):
def
can_coerce_to_char_literal
(
self
):
return
len
(
self
.
value
)
==
1
return
len
(
self
.
value
)
==
1
def
contains_surrogates
(
self
):
# Check if the unicode string contains surrogate code points
# on a CPython platform with wide (UCS-4) or narrow (UTF-16)
# Unicode, i.e. characters that would be spelled as two
# separate code units on a narrow platform.
for
c
in
map
(
ord
,
self
.
value
):
if
c
>
65535
:
# can only happen on wide platforms
return
True
# We only look for the first code unit (D800-DBFF) of a
# surrogate pair - if we find one, the other one
# (DC00-DFFF) is likely there, too. If we don't find it,
# any second code unit cannot make for a surrogate pair by
# itself.
if
c
>=
0xD800
and
c
<=
0xDBFF
:
return
True
return
False
def
generate_evaluation_code
(
self
,
code
):
def
generate_evaluation_code
(
self
,
code
):
self
.
result_code
=
code
.
get_py_string_const
(
self
.
value
)
self
.
result_code
=
code
.
get_py_string_const
(
self
.
value
)
...
...
Cython/Compiler/Optimize.py
View file @
f180a00f
...
@@ -600,6 +600,12 @@ class SwitchTransform(Visitor.VisitorTransform):
...
@@ -600,6 +600,12 @@ class SwitchTransform(Visitor.VisitorTransform):
not_in
=
cond
.
operator
==
'not_in'
not_in
=
cond
.
operator
==
'not_in'
if
not_in
and
not
allow_not_in
:
if
not_in
and
not
allow_not_in
:
return
self
.
NO_MATCH
return
self
.
NO_MATCH
if
isinstance
(
cond
.
operand2
,
ExprNodes
.
UnicodeNode
)
and
\
cond
.
operand2
.
contains_surrogates
():
# dealing with surrogates leads to different
# behaviour on wide and narrow Unicode
# platforms => refuse to optimise this case
return
self
.
NO_MATCH
# this looks somewhat silly, but it does the right
# this looks somewhat silly, but it does the right
# checks for NameNode and AttributeNode
# checks for NameNode and AttributeNode
if
is_common_value
(
cond
.
operand1
,
cond
.
operand1
):
if
is_common_value
(
cond
.
operand1
,
cond
.
operand1
):
...
...
tests/run/inop.pyx
View file @
f180a00f
...
@@ -195,6 +195,34 @@ def m_unicode_literal(Py_UNICODE a):
...
@@ -195,6 +195,34 @@ def m_unicode_literal(Py_UNICODE a):
cdef
int
result
=
a
in
u'abc
\
0
defg
\
u1234
\
uF8D2
'
cdef
int
result
=
a
in
u'abc
\
0
defg
\
u1234
\
uF8D2
'
return
result
return
result
cdef
unicode
wide_unicode_character
=
u'
\
U0010FEDC
'
py_wide_unicode_character
=
wide_unicode_character
cdef
unicode
wide_unicode_character_surrogate1
=
u'
\
uDBFF
'
cdef
unicode
wide_unicode_character_surrogate2
=
u'
\
uDEDC
'
py_wide_unicode_character_surrogate1
=
wide_unicode_character_surrogate1
py_wide_unicode_character_surrogate2
=
wide_unicode_character_surrogate2
@
cython
.
test_fail_if_path_exists
(
"//SwitchStatNode"
)
@
cython
.
test_assert_path_exists
(
"//PrimaryCmpNode"
)
def
m_wide_unicode_literal
(
Py_UNICODE
a
):
"""
>>> m_unicode_literal(ord('f'))
1
>>> m_unicode_literal(ord('X'))
0
>>> import sys
>>> if sys.maxunicode == 65535:
... m_wide_unicode_literal(ord(py_wide_unicode_character_surrogate1))
... m_wide_unicode_literal(ord(py_wide_unicode_character_surrogate2))
... else:
... m_wide_unicode_literal(ord(py_wide_unicode_character))
... 1
1
1
"""
cdef
int
result
=
a
in
u'abc
\
0
defg
\
u1234
\
uF8D2
\
U0010FEDC
'
return
result
@
cython
.
test_assert_path_exists
(
"//SwitchStatNode"
)
@
cython
.
test_assert_path_exists
(
"//SwitchStatNode"
)
@
cython
.
test_fail_if_path_exists
(
"//BoolBinopNode"
,
"//PrimaryCmpNode"
)
@
cython
.
test_fail_if_path_exists
(
"//BoolBinopNode"
,
"//PrimaryCmpNode"
)
def
conditional_int
(
int
a
):
def
conditional_int
(
int
a
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment