Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cython
Commits
901cfed4
Commit
901cfed4
authored
Feb 03, 2019
by
Robert Bradshaw
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Recognize that the default encoding is always utf-8 in Python 3.
This fixes Github issue #2819.
parent
b00fd6ae
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
154 additions
and
1 deletion
+154
-1
Cython/Compiler/ModuleNode.py
Cython/Compiler/ModuleNode.py
+4
-1
tests/run/cpp_stl_string_utf8_auto_encoding.pyx
tests/run/cpp_stl_string_utf8_auto_encoding.pyx
+150
-0
No files found.
Cython/Compiler/ModuleNode.py
View file @
901cfed4
...
@@ -693,10 +693,13 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
...
@@ -693,10 +693,13 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
if
c_string_type
not
in
(
'bytes'
,
'bytearray'
)
and
not
c_string_encoding
:
if
c_string_type
not
in
(
'bytes'
,
'bytearray'
)
and
not
c_string_encoding
:
error
(
self
.
pos
,
"a default encoding must be provided if c_string_type is not a byte type"
)
error
(
self
.
pos
,
"a default encoding must be provided if c_string_type is not a byte type"
)
code
.
putln
(
'#define __PYX_DEFAULT_STRING_ENCODING_IS_ASCII %s'
%
int
(
c_string_encoding
==
'ascii'
))
code
.
putln
(
'#define __PYX_DEFAULT_STRING_ENCODING_IS_ASCII %s'
%
int
(
c_string_encoding
==
'ascii'
))
code
.
putln
(
'#define __PYX_DEFAULT_STRING_ENCODING_IS_UTF8 %s'
%
int
(
c_string_encoding
.
replace
(
'-'
,
''
).
lower
()
==
'utf8'
))
if
c_string_encoding
==
'default'
:
if
c_string_encoding
==
'default'
:
code
.
putln
(
'#define __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT 1'
)
code
.
putln
(
'#define __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT 1'
)
else
:
else
:
code
.
putln
(
'#define __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT 0'
)
code
.
putln
(
'#define __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT '
'(PY_MAJOR_VERSION >= 3 && __PYX_DEFAULT_STRING_ENCODING_IS_UTF8)'
)
code
.
putln
(
'#define __PYX_DEFAULT_STRING_ENCODING "%s"'
%
c_string_encoding
)
code
.
putln
(
'#define __PYX_DEFAULT_STRING_ENCODING "%s"'
%
c_string_encoding
)
if
c_string_type
==
'bytearray'
:
if
c_string_type
==
'bytearray'
:
c_string_func_name
=
'ByteArray'
c_string_func_name
=
'ByteArray'
...
...
tests/run/cpp_stl_string_utf8_auto_encoding.pyx
0 → 100644
View file @
901cfed4
# mode: run
# tag: cpp, werror
# cython: c_string_encoding=utf-8, c_string_type=unicode
cimport
cython
from
libcpp.string
cimport
string
b_asdf
=
b'asdf'
s_asdf
=
'asdf'
u_asdf
=
u'asdf'
u_s
=
u's'
def
test_conversion
(
py_obj
):
"""
>>> test_conversion(b_asdf) == u_asdf or test_conversion(b_asdf)
True
>>> test_conversion(u_asdf) == u_asdf or test_conversion(u_asdf)
True
>>> test_conversion(123) # doctest: +ELLIPSIS
Traceback (most recent call last):
TypeError: expected ..., int found
"""
cdef
string
s
=
py_obj
assert
<
size_t
>
len
(
py_obj
)
==
s
.
length
(),
'%d != %d'
%
(
len
(
py_obj
),
s
.
length
())
return
s
def
test_empty
(
py_obj
):
"""
>>> test_empty('')
True
>>> test_empty('abc')
False
>>> test_empty(u_asdf[:0])
True
>>> test_empty(u_asdf)
False
"""
cdef
string
a
=
py_obj
return
a
.
empty
()
def
test_push_back
(
a
):
"""
>>> test_push_back(b_asdf) == u_asdf + u_s
True
>>> test_push_back(u_asdf) == u_asdf + u_s
True
"""
cdef
string
s
=
a
s
.
push_back
(
<
char
>
ord
(
's'
))
return
s
def
test_clear
(
a
):
"""
>>> test_clear(u_asdf) == u_s[:0]
True
>>> test_clear(b_asdf) == u_s[:0]
True
"""
cdef
string
s
=
a
s
.
clear
()
return
s
def
test_assign
(
char
*
a
):
"""
>>> test_assign(b_asdf) == 'ggg'
True
"""
cdef
string
s
=
string
(
a
)
s
.
assign
(
<
char
*>
"ggg"
)
return
s
.
c_str
()
def
test_bytes_cast
(
a
):
"""
>>> b = test_bytes_cast(b'abc')
>>> isinstance(b, bytes)
True
>>> print(b.decode('ascii'))
abc
>>> b = test_bytes_cast(b'abc
\
\
xe4
\
\
xfc')
>>> isinstance(b, bytes)
True
>>> len(b)
5
>>> print(b[:3].decode('ascii'))
abc
>>> print(ord(b[3:4]))
228
>>> print(ord(b[4:5]))
252
"""
cdef
string
s
=
a
assert
s
.
length
()
==
<
size_t
>
len
(
a
),
"%d != %d"
%
(
s
.
length
(),
len
(
a
))
return
<
bytes
>
s
def
test_bytearray_cast
(
a
):
"""
>>> b = test_bytearray_cast(b'abc')
>>> isinstance(b, bytearray)
True
>>> print(b.decode('ascii'))
abc
>>> b = test_bytearray_cast(b'abc
\
\
xe4
\
\
xfc')
>>> isinstance(b, bytearray)
True
>>> len(b)
5
>>> print(b[:3].decode('ascii'))
abc
>>> print(ord(b[3:4]))
228
>>> print(ord(b[4:5]))
252
"""
cdef
string
s
=
a
assert
s
.
length
()
==
<
size_t
>
len
(
a
),
"%d != %d"
%
(
s
.
length
(),
len
(
a
))
return
<
bytearray
>
s
def
test_unicode_cast
(
a
):
"""
>>> u = test_unicode_cast(b'abc')
>>> type(u) is type(u_asdf) or type(u)
True
>>> print(u)
abc
"""
cdef
string
s
=
a
assert
s
.
length
()
==
<
size_t
>
len
(
a
),
"%d != %d"
%
(
s
.
length
(),
len
(
a
))
return
<
unicode
>
s
def
test_str_cast
(
a
):
"""
>>> s = test_str_cast(b'abc')
>>> type(s) is type(s_asdf) or type(s)
True
>>> print(s)
abc
"""
cdef
string
s
=
a
assert
s
.
length
()
==
<
size_t
>
len
(
a
),
"%d != %d"
%
(
s
.
length
(),
len
(
a
))
return
<
str
>
s
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment