Commit 85ee8e60 authored by Robert Bradshaw's avatar Robert Bradshaw

support 'default' encoding

parent 2fe2ca90
......@@ -82,10 +82,10 @@ def find_coercion_error(type_tuple, default, env):
and env.directives['c_string_encoding']):
if type_tuple[1].is_pyobject:
return default
elif env.directives['c_string_encoding'] == 'ascii':
elif env.directives['c_string_encoding'] in ('ascii', 'default'):
return default
else:
return "'%s' objects do not support coercion to C types with non-ascii default encoding" % type_tuple[0].name
return "'%s' objects do not support coercion to C types with non-ascii or non-default c_string_encoding" % type_tuple[0].name
else:
return err
......
......@@ -562,6 +562,7 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
if c_string_type != 'bytes' and not c_string_encoding:
error(self.pos, "a default encoding must be provided if c_string_type != bytes")
code.putln('#define __PYX_DEFAULT_STRING_ENCODING_IS_ASCII %s' % int(c_string_encoding == 'ascii'))
code.putln('#define __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT %s' % int(c_string_encoding == 'default'))
code.putln('#define __PYX_DEFAULT_STRING_ENCODING "%s"' % c_string_encoding)
code.putln('#define __Pyx_PyObject_FromString __Pyx_Py%s_FromString' % c_string_type.title())
code.putln('#define __Pyx_PyObject_FromStringAndSize __Pyx_Py%s_FromStringAndSize' % c_string_type.title())
......@@ -1898,8 +1899,8 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
code.putln("/*--- Initialize various global constants etc. ---*/")
code.putln(code.error_goto_if_neg("__Pyx_InitGlobals()", self.pos))
code.putln("#ifdef __PYX_DEFAULT_STRING_ENCODING_IS_ASCII")
code.putln("if (__Pyx_init_sys_getdefaultencoding_not_ascii() < 0) %s" % code.error_goto(self.pos))
code.putln("#if PY_VERSION_HEX < 0x03000000 && (__PYX_DEFAULT_STRING_ENCODING_IS_ASCII || __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT)")
code.putln("if (__Pyx_init_sys_getdefaultencoding_params() < 0) %s" % code.error_goto(self.pos))
code.putln("#endif")
__main__name = code.globalstate.get_py_string_const(
......
......@@ -42,7 +42,7 @@ static CYTHON_INLINE size_t __Pyx_PyInt_AsSize_t(PyObject*);
#if PY_VERSION_HEX < 0x03000000 && __PYX_DEFAULT_STRING_ENCODING_IS_ASCII
static int __Pyx_sys_getdefaultencoding_not_ascii;
static int __Pyx_init_sys_getdefaultencoding_not_ascii() {
static int __Pyx_init_sys_getdefaultencoding_params() {
PyObject* sys = NULL;
PyObject* default_encoding = NULL;
PyObject* ascii_chars_u = NULL;
......@@ -84,8 +84,34 @@ bad:
Py_XDECREF(ascii_chars_b);
return -1;
}
#endif
#if __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT
#undef __PYX_DEFAULT_STRING_ENCODING
#if PY_VERSION_HEX < 0x03000000
static char* __PYX_DEFAULT_STRING_ENCODING;
static int __Pyx_init_sys_getdefaultencoding_params() {
PyObject* sys = NULL;
PyObject* default_encoding = NULL;
char* default_encoding_c;
sys = PyImport_ImportModule("sys");
if (sys == NULL) goto bad;
default_encoding = PyObject_CallMethod(sys, (char*) (const char*) "getdefaultencoding", NULL);
if (default_encoding == NULL) goto bad;
default_encoding_c = PyBytes_AS_STRING(default_encoding);
__PYX_DEFAULT_STRING_ENCODING = (char*) malloc(strlen(default_encoding_c));
strcpy(__PYX_DEFAULT_STRING_ENCODING, default_encoding_c);
Py_XDECREF(sys);
Py_XDECREF(default_encoding);
return 0;
bad:
Py_XDECREF(sys);
Py_XDECREF(default_encoding);
return -1;
}
#else
#define __Pyx_init_sys_getdefaultencoding_not_ascii() 0
#define __PYX_DEFAULT_STRING_ENCODING "utf-8"
#endif
#endif
......@@ -103,9 +129,9 @@ static CYTHON_INLINE char* __Pyx_PyObject_AsString(PyObject* o) {
}
static CYTHON_INLINE char* __Pyx_PyObject_AsStringAndSize(PyObject* o, Py_ssize_t *length) {
#if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII
#if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII || __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT
if (
#if PY_VERSION_HEX < 0x03000000
#if PY_VERSION_HEX < 0x03000000 && __PYX_DEFAULT_STRING_ENCODING_IS_ASCII
__Pyx_sys_getdefaultencoding_not_ascii &&
#endif
PyUnicode_Check(o)) {
......@@ -113,20 +139,23 @@ static CYTHON_INLINE char* __Pyx_PyObject_AsStringAndSize(PyObject* o, Py_ssize_
// borrowed, cached reference
PyObject* defenc = _PyUnicode_AsDefaultEncodedString(o, NULL);
if (!defenc) return NULL;
char* maybe_ascii = PyBytes_AS_STRING(defenc);
char* end = maybe_ascii + PyBytes_GET_SIZE(defenc);
char* defenc_c = PyBytes_AS_STRING(defenc);
#if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII
char* end = defenc_c + PyBytes_GET_SIZE(defenc);
char* c;
for (c = maybe_ascii; c < end; c++) {
for (c = defenc_c; c < end; c++) {
if ((unsigned char) (*c) >= 128) {
// raise the error
PyUnicode_AsASCIIString(o);
return NULL;
}
}
#endif /*__PYX_DEFAULT_STRING_ENCODING_IS_ASCII*/
*length = PyBytes_GET_SIZE(defenc);
return maybe_ascii;
return defenc_c;
#else /* PY_VERSION_HEX < 0x03030000 */
if (PyUnicode_READY(o) == -1) return NULL;
#if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII
if (PyUnicode_IS_ASCII(o)) {
// cached for the lifetime of the object
*length = PyUnicode_GET_DATA_SIZE(o);
......@@ -136,9 +165,12 @@ static CYTHON_INLINE char* __Pyx_PyObject_AsStringAndSize(PyObject* o, Py_ssize_
PyUnicode_AsASCIIString(o);
return NULL;
}
#else /* __PYX_DEFAULT_STRING_ENCODING_IS_ASCII */
return PyUnicode_AsUTF8AndSize(o, length);
#endif /* __PYX_DEFAULT_STRING_ENCODING_IS_ASCII */
#endif /* PY_VERSION_HEX < 0x03030000 */
} else
#endif /* __PYX_DEFAULT_STRING_ENCODING_IS_ASCII */
#endif /* __PYX_DEFAULT_STRING_ENCODING_IS_ASCII || __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT */
{
char* result;
int r = PyBytes_AsStringAndSize(o, &result, length);
......
#cython: c_string_type = str
#cython: c_string_encoding = ascii
from libc.string cimport strcmp
def as_objects(char* ascii_data):
"""
>>> as_objects('abc')
'abc'
"""
assert isinstance(<object>ascii_data, str)
assert isinstance(<bytes>ascii_data, bytes)
assert isinstance(<str>ascii_data, str)
assert isinstance(<unicode>ascii_data, unicode)
return ascii_data
def from_object():
"""
>>> from_object()
"""
cdef bytes b = b"abc"
cdef str s = "abc"
cdef unicode u = u"abc"
assert strcmp(<char*>b, "abc") == 0
assert strcmp(<char*>s, "abc") == 0
assert strcmp(<char*>u, "abc") == 0
def slice_as_objects(char* ascii_data, int start, int end):
"""
>>> slice_as_objects('grok', 1, 3)
'ro'
"""
assert isinstance(<object>ascii_data[start:end], str)
assert isinstance(<bytes>ascii_data[start:end], bytes)
assert isinstance(<str>ascii_data[start:end], str)
assert isinstance(<unicode>ascii_data[start:end], unicode)
assert isinstance(<object>ascii_data[start:], str)
assert isinstance(<bytes>ascii_data[start:], bytes)
assert isinstance(<str>ascii_data[start:], str)
assert isinstance(<unicode>ascii_data[start:], unicode)
return ascii_data[start:end]
#cython: c_string_type = str
#cython: c_string_encoding = ascii
# cython: c_string_type = str
# cython: c_string_encoding = default
from libc.string cimport strcmp
def as_objects(char* ascii_data):
"""
>>> as_objects('abc')
'abc'
"""
assert isinstance(<object>ascii_data, str)
assert isinstance(<bytes>ascii_data, bytes)
assert isinstance(<str>ascii_data, str)
assert isinstance(<unicode>ascii_data, unicode)
return ascii_data
def from_object():
"""
>>> from_object()
import sys
if sys.version_info[0] >= 3:
__doc__ = r"""
>>> as_objects("ab\xff") == "ab\xff"
True
>>> slice_as_objects("ab\xffd", 1, 4) == "b\xff"
True
"""
cdef bytes b = b"abc"
cdef str s = "abc"
cdef unicode u = u"abc"
assert strcmp(<char*>b, "abc") == 0
assert strcmp(<char*>s, "abc") == 0
assert strcmp(<char*>u, "abc") == 0
def slice_as_objects(char* ascii_data, int start, int end):
"""
>>> slice_as_objects('grok', 1, 3)
'ro'
"""
assert isinstance(<object>ascii_data[start:end], str)
assert isinstance(<bytes>ascii_data[start:end], bytes)
assert isinstance(<str>ascii_data[start:end], str)
assert isinstance(<unicode>ascii_data[start:end], unicode)
assert isinstance(<object>ascii_data[start:], str)
assert isinstance(<bytes>ascii_data[start:], bytes)
assert isinstance(<str>ascii_data[start:], str)
assert isinstance(<unicode>ascii_data[start:], unicode)
return ascii_data[start:end]
include "unicode_ascii_encoding.pyx"
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment