Commit ec5a7ff2 authored by Robert Bradshaw's avatar Robert Bradshaw Committed by Stefan Behnel

Recognize that the default encoding is always utf-8 in Python 3.

This fixes Github issue #2819.
parent 0962e3ff
......@@ -693,10 +693,13 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
if c_string_type not in ('bytes', 'bytearray') and not c_string_encoding:
error(self.pos, "a default encoding must be provided if c_string_type is not a byte type")
code.putln('#define __PYX_DEFAULT_STRING_ENCODING_IS_ASCII %s' % int(c_string_encoding == 'ascii'))
code.putln('#define __PYX_DEFAULT_STRING_ENCODING_IS_UTF8 %s' %
int(c_string_encoding.replace('-', '').lower() == 'utf8'))
if c_string_encoding == 'default':
code.putln('#define __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT 1')
else:
code.putln('#define __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT 0')
code.putln('#define __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT '
'(PY_MAJOR_VERSION >= 3 && __PYX_DEFAULT_STRING_ENCODING_IS_UTF8)')
code.putln('#define __PYX_DEFAULT_STRING_ENCODING "%s"' % c_string_encoding)
if c_string_type == 'bytearray':
c_string_func_name = 'ByteArray'
......
# mode: run
# tag: cpp, werror
# cython: c_string_encoding=utf-8, c_string_type=unicode
cimport cython
from libcpp.string cimport string
b_asdf = b'asdf'
s_asdf = 'asdf'
u_asdf = u'asdf'
u_s = u's'
def test_conversion(py_obj):
"""
>>> test_conversion(b_asdf) == u_asdf or test_conversion(b_asdf)
True
>>> test_conversion(u_asdf) == u_asdf or test_conversion(u_asdf)
True
>>> test_conversion(123) # doctest: +ELLIPSIS
Traceback (most recent call last):
TypeError: expected ..., int found
"""
cdef string s = py_obj
assert <size_t>len(py_obj) == s.length(), '%d != %d' % (len(py_obj), s.length())
return s
def test_empty(py_obj):
"""
>>> test_empty('')
True
>>> test_empty('abc')
False
>>> test_empty(u_asdf[:0])
True
>>> test_empty(u_asdf)
False
"""
cdef string a = py_obj
return a.empty()
def test_push_back(a):
"""
>>> test_push_back(b_asdf) == u_asdf + u_s
True
>>> test_push_back(u_asdf) == u_asdf + u_s
True
"""
cdef string s = a
s.push_back(<char>ord('s'))
return s
def test_clear(a):
"""
>>> test_clear(u_asdf) == u_s[:0]
True
>>> test_clear(b_asdf) == u_s[:0]
True
"""
cdef string s = a
s.clear()
return s
def test_assign(char *a):
"""
>>> test_assign(b_asdf) == 'ggg'
True
"""
cdef string s = string(a)
s.assign(<char *>"ggg")
return s.c_str()
def test_bytes_cast(a):
"""
>>> b = test_bytes_cast(b'abc')
>>> isinstance(b, bytes)
True
>>> print(b.decode('ascii'))
abc
>>> b = test_bytes_cast(b'abc\\xe4\\xfc')
>>> isinstance(b, bytes)
True
>>> len(b)
5
>>> print(b[:3].decode('ascii'))
abc
>>> print(ord(b[3:4]))
228
>>> print(ord(b[4:5]))
252
"""
cdef string s = a
assert s.length() == <size_t>len(a), "%d != %d" % (s.length(), len(a))
return <bytes>s
def test_bytearray_cast(a):
"""
>>> b = test_bytearray_cast(b'abc')
>>> isinstance(b, bytearray)
True
>>> print(b.decode('ascii'))
abc
>>> b = test_bytearray_cast(b'abc\\xe4\\xfc')
>>> isinstance(b, bytearray)
True
>>> len(b)
5
>>> print(b[:3].decode('ascii'))
abc
>>> print(ord(b[3:4]))
228
>>> print(ord(b[4:5]))
252
"""
cdef string s = a
assert s.length() == <size_t>len(a), "%d != %d" % (s.length(), len(a))
return <bytearray>s
def test_unicode_cast(a):
"""
>>> u = test_unicode_cast(b'abc')
>>> type(u) is type(u_asdf) or type(u)
True
>>> print(u)
abc
"""
cdef string s = a
assert s.length() == <size_t>len(a), "%d != %d" % (s.length(), len(a))
return <unicode>s
def test_str_cast(a):
"""
>>> s = test_str_cast(b'abc')
>>> type(s) is type(s_asdf) or type(s)
True
>>> print(s)
abc
"""
cdef string s = a
assert s.length() == <size_t>len(a), "%d != %d" % (s.length(), len(a))
return <str>s
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment