Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Boxiang Sun
cython
Commits
85486ea2
Commit
85486ea2
authored
Mar 03, 2013
by
Nikita Nemkin
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Full support for Py_UNICODE[] literals with non-BMP characters.
parent
3ce78016
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
36 additions
and
23 deletions
+36
-23
Cython/Compiler/Code.py
Cython/Compiler/Code.py
+12
-15
Cython/Compiler/ExprNodes.py
Cython/Compiler/ExprNodes.py
+0
-2
Cython/Compiler/StringEncoding.py
Cython/Compiler/StringEncoding.py
+14
-6
tests/run/py_unicode_strings.pyx
tests/run/py_unicode_strings.pyx
+10
-0
No files found.
Cython/Compiler/Code.py
View file @
85486ea2
...
...
@@ -778,15 +778,6 @@ class StringConst(object):
self.py_strings[key] = py_string
return py_string
class UnicodeConst(object):
"""Global info about a Py_UNICODE[] constant held by GlobalState.
"""
# cname string
# text EncodedString (unicode)
def __init__(self, cname, text):
self.cname = cname
self.text = text
class PyStringConst(object):
"""Global info about a Python string constant held by GlobalState.
...
...
@@ -1033,8 +1024,7 @@ class GlobalState(object):
try
:
c
=
self
.
unicode_const_index
[
text
]
except
KeyError
:
c
=
UnicodeConst
(
self
.
new_const_cname
(),
text
)
self
.
unicode_const_index
[
text
]
=
c
c
=
self
.
unicode_const_index
[
text
]
=
self
.
new_const_cname
()
return
c
def
get_py_string_const
(
self
,
text
,
identifier
=
None
,
...
...
@@ -1162,9 +1152,16 @@ class GlobalState(object):
for
py_string
in
c
.
py_strings
.
values
():
py_strings
.
append
((
c
.
cname
,
len
(
py_string
.
cname
),
py_string
))
for
c
in
self
.
unicode_const_index
.
values
():
decls_writer
.
putln
(
'static Py_UNICODE %s[] = { %s };'
%
(
c
.
cname
,
StringEncoding
.
encode_py_unicode_string
(
c
.
text
)))
for
c
,
cname
in
self
.
unicode_const_index
.
items
():
utf16_array
,
utf32_array
=
StringEncoding
.
encode_py_unicode_string
(
c
)
if
utf16_array
:
# Narrow and wide representations differ
decls_writer
.
putln
(
"#if Py_UNICODE_WIDE"
)
decls_writer
.
putln
(
"static Py_UNICODE %s[] = { %s };"
%
(
cname
,
utf32_array
))
if
utf16_array
:
decls_writer
.
putln
(
"#else"
)
decls_writer
.
putln
(
"static Py_UNICODE %s[] = { %s };"
%
(
cname
,
utf16_array
))
decls_writer
.
putln
(
"#endif"
)
if
py_strings
:
self
.
use_utility_code
(
UtilityCode
.
load_cached
(
"InitStrings"
,
"StringTools.c"
))
...
...
@@ -1461,7 +1458,7 @@ class CCodeWriter(object):
return
self
.
globalstate
.
get_string_const
(
text
).
cname
def
get_unicode_const
(
self
,
text
):
return
self
.
globalstate
.
get_unicode_const
(
text
)
.
cname
return
self
.
globalstate
.
get_unicode_const
(
text
)
def
get_py_string_const
(
self
,
text
,
identifier
=
None
,
is_str
=
False
,
unicode_value
=
None
):
...
...
Cython/Compiler/ExprNodes.py
View file @
85486ea2
...
...
@@ -1242,8 +1242,6 @@ class UnicodeNode(ConstNode):
if
self
.
type
.
is_pyobject
:
self
.
result_code
=
code
.
get_py_string_const
(
self
.
value
)
else
:
if
self
.
contains_surrogates
():
warning
(
self
.
pos
,
"Py_UNICODE* literals with characters outside BMP are not portable."
,
level
=
1
);
self
.
result_code
=
code
.
get_unicode_const
(
self
.
value
)
def
calculate_result_code
(
self
):
...
...
Cython/Compiler/StringEncoding.py
View file @
85486ea2
...
...
@@ -267,10 +267,18 @@ def split_string_literal(s, limit=2000):
def
encode_py_unicode_string
(
s
):
"""Create Py_UNICODE[] representation of a given unicode string.
"""
# Non-BMP characters will appear as surrogates, which is not compatible with
# wide (UTF-32) Python builds. UnicodeNode will warn the user about this.
utf32_array
=
array
.
array
(
'i'
,
s
.
encode
(
'UTF-32'
))
assert
utf32_array
.
itemsize
==
4
utf32_array
.
pop
(
0
)
# Remove BOM
utf32_array
.
append
(
0
)
# Add NULL terminator
for
c
in
utf32_array
:
if
c
>
65535
:
utf16_array
=
array
.
array
(
'H'
,
s
.
encode
(
'UTF-16'
))
utf16_array
.
pop
(
0
)
# Remove BOM
utf16_array
.
append
(
0
)
# Add NULL terminator
break
else
:
utf16_array
=
[]
a
=
array
.
array
(
'H'
,
s
.
encode
(
'UTF-16'
))
a
.
pop
(
0
)
# Remove BOM
a
.
append
(
0
)
# Add NULL terminator
return
u","
.
join
(
map
(
unicode
,
a
))
return
","
.
join
(
map
(
unicode
,
utf16_array
)),
","
.
join
(
map
(
unicode
,
utf32_array
))
tests/run/py_unicode_strings.pyx
View file @
85486ea2
...
...
@@ -19,6 +19,8 @@ cdef Py_UNICODE c_pu_arr[42]
cdef
LPWSTR
c_wstr
=
u"unicode
\
u1234
"
cdef
Py_UNICODE
*
c_pu_empty
=
u""
cdef
char
*
c_empty
=
""
cdef
unicode
uwide_literal
=
u'
\
U00020000
\
U00020001
'
cdef
Py_UNICODE
*
c_pu_wide_literal
=
u'
\
U00020000
\
U00020001
'
memcpy
(
c_pu_arr
,
c_pu_str
,
sizeof
(
Py_UNICODE
)
*
(
len
(
uobj
)
+
1
))
...
...
@@ -55,6 +57,12 @@ def test_c_to_python():
assert
sizeof
(
c_pu_arr
)
==
sizeof
(
Py_UNICODE
)
*
42
assert
sizeof
(
c_pu_str
)
==
sizeof
(
void
*
)
assert
c_pu_wide_literal
==
uwide_literal
if
sizeof
(
Py_UNICODE
)
>=
4
:
assert
len
(
c_pu_wide_literal
)
==
2
else
:
assert
len
(
c_pu_wide_literal
)
==
4
assert
u'unicode'
assert
not
u''
assert
c_pu_str
...
...
@@ -80,5 +88,7 @@ def test_python_to_c():
u
=
uobj
[
1
]
assert
Py_UNICODE_equal
(
<
Py_UNICODE
*>
u"n"
,
u
)
assert
Py_UNICODE_equal
(
uwide_literal
,
<
Py_UNICODE
*>
c_pu_wide_literal
)
assert
len
(
u"abc
\
0
"
)
==
4
assert
len
(
<
Py_UNICODE
*>
u"abc
\
0
"
)
==
3
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment