Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Labels
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Commits
Open sidebar
nexedi
cython
Commits
ec5a7ff2
Commit
ec5a7ff2
authored
5 years ago
by
Robert Bradshaw
Committed by
Stefan Behnel
5 years ago
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Recognize that the default encoding is always utf-8 in Python 3.
This fixes Github issue #2819.
parent
0962e3ff
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
154 additions
and
1 deletion
+154
-1
Cython/Compiler/ModuleNode.py
Cython/Compiler/ModuleNode.py
+4
-1
tests/run/cpp_stl_string_utf8_auto_encoding.pyx
tests/run/cpp_stl_string_utf8_auto_encoding.pyx
+150
-0
No files found.
Cython/Compiler/ModuleNode.py
View file @
ec5a7ff2
...
...
@@ -693,10 +693,13 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
if
c_string_type
not
in
(
'bytes'
,
'bytearray'
)
and
not
c_string_encoding
:
error
(
self
.
pos
,
"a default encoding must be provided if c_string_type is not a byte type"
)
code
.
putln
(
'#define __PYX_DEFAULT_STRING_ENCODING_IS_ASCII %s'
%
int
(
c_string_encoding
==
'ascii'
))
code
.
putln
(
'#define __PYX_DEFAULT_STRING_ENCODING_IS_UTF8 %s'
%
int
(
c_string_encoding
.
replace
(
'-'
,
''
).
lower
()
==
'utf8'
))
if
c_string_encoding
==
'default'
:
code
.
putln
(
'#define __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT 1'
)
else
:
code
.
putln
(
'#define __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT 0'
)
code
.
putln
(
'#define __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT '
'(PY_MAJOR_VERSION >= 3 && __PYX_DEFAULT_STRING_ENCODING_IS_UTF8)'
)
code
.
putln
(
'#define __PYX_DEFAULT_STRING_ENCODING "%s"'
%
c_string_encoding
)
if
c_string_type
==
'bytearray'
:
c_string_func_name
=
'ByteArray'
...
...
This diff is collapsed.
Click to expand it.
tests/run/cpp_stl_string_utf8_auto_encoding.pyx
0 → 100644
View file @
ec5a7ff2
# mode: run
# tag: cpp, werror
# cython: c_string_encoding=utf-8, c_string_type=unicode
cimport
cython
from
libcpp.string
cimport
string
b_asdf
=
b'asdf'
s_asdf
=
'asdf'
u_asdf
=
u'asdf'
u_s
=
u's'
def
test_conversion
(
py_obj
):
"""
>>> test_conversion(b_asdf) == u_asdf or test_conversion(b_asdf)
True
>>> test_conversion(u_asdf) == u_asdf or test_conversion(u_asdf)
True
>>> test_conversion(123) # doctest: +ELLIPSIS
Traceback (most recent call last):
TypeError: expected ..., int found
"""
cdef
string
s
=
py_obj
assert
<
size_t
>
len
(
py_obj
)
==
s
.
length
(),
'%d != %d'
%
(
len
(
py_obj
),
s
.
length
())
return
s
def
test_empty
(
py_obj
):
"""
>>> test_empty('')
True
>>> test_empty('abc')
False
>>> test_empty(u_asdf[:0])
True
>>> test_empty(u_asdf)
False
"""
cdef
string
a
=
py_obj
return
a
.
empty
()
def
test_push_back
(
a
):
"""
>>> test_push_back(b_asdf) == u_asdf + u_s
True
>>> test_push_back(u_asdf) == u_asdf + u_s
True
"""
cdef
string
s
=
a
s
.
push_back
(
<
char
>
ord
(
's'
))
return
s
def
test_clear
(
a
):
"""
>>> test_clear(u_asdf) == u_s[:0]
True
>>> test_clear(b_asdf) == u_s[:0]
True
"""
cdef
string
s
=
a
s
.
clear
()
return
s
def
test_assign
(
char
*
a
):
"""
>>> test_assign(b_asdf) == 'ggg'
True
"""
cdef
string
s
=
string
(
a
)
s
.
assign
(
<
char
*>
"ggg"
)
return
s
.
c_str
()
def
test_bytes_cast
(
a
):
"""
>>> b = test_bytes_cast(b'abc')
>>> isinstance(b, bytes)
True
>>> print(b.decode('ascii'))
abc
>>> b = test_bytes_cast(b'abc
\
\
xe4
\
\
xfc')
>>> isinstance(b, bytes)
True
>>> len(b)
5
>>> print(b[:3].decode('ascii'))
abc
>>> print(ord(b[3:4]))
228
>>> print(ord(b[4:5]))
252
"""
cdef
string
s
=
a
assert
s
.
length
()
==
<
size_t
>
len
(
a
),
"%d != %d"
%
(
s
.
length
(),
len
(
a
))
return
<
bytes
>
s
def
test_bytearray_cast
(
a
):
"""
>>> b = test_bytearray_cast(b'abc')
>>> isinstance(b, bytearray)
True
>>> print(b.decode('ascii'))
abc
>>> b = test_bytearray_cast(b'abc
\
\
xe4
\
\
xfc')
>>> isinstance(b, bytearray)
True
>>> len(b)
5
>>> print(b[:3].decode('ascii'))
abc
>>> print(ord(b[3:4]))
228
>>> print(ord(b[4:5]))
252
"""
cdef
string
s
=
a
assert
s
.
length
()
==
<
size_t
>
len
(
a
),
"%d != %d"
%
(
s
.
length
(),
len
(
a
))
return
<
bytearray
>
s
def
test_unicode_cast
(
a
):
"""
>>> u = test_unicode_cast(b'abc')
>>> type(u) is type(u_asdf) or type(u)
True
>>> print(u)
abc
"""
cdef
string
s
=
a
assert
s
.
length
()
==
<
size_t
>
len
(
a
),
"%d != %d"
%
(
s
.
length
(),
len
(
a
))
return
<
unicode
>
s
def
test_str_cast
(
a
):
"""
>>> s = test_str_cast(b'abc')
>>> type(s) is type(s_asdf) or type(s)
True
>>> print(s)
abc
"""
cdef
string
s
=
a
assert
s
.
length
()
==
<
size_t
>
len
(
a
),
"%d != %d"
%
(
s
.
length
(),
len
(
a
))
return
<
str
>
s
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment