Commit 85ee8e60 authored by Robert Bradshaw's avatar Robert Bradshaw

support 'default' encoding

parent 2fe2ca90
...@@ -82,10 +82,10 @@ def find_coercion_error(type_tuple, default, env): ...@@ -82,10 +82,10 @@ def find_coercion_error(type_tuple, default, env):
and env.directives['c_string_encoding']): and env.directives['c_string_encoding']):
if type_tuple[1].is_pyobject: if type_tuple[1].is_pyobject:
return default return default
elif env.directives['c_string_encoding'] == 'ascii': elif env.directives['c_string_encoding'] in ('ascii', 'default'):
return default return default
else: else:
return "'%s' objects do not support coercion to C types with non-ascii default encoding" % type_tuple[0].name return "'%s' objects do not support coercion to C types with non-ascii or non-default c_string_encoding" % type_tuple[0].name
else: else:
return err return err
......
...@@ -562,6 +562,7 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode): ...@@ -562,6 +562,7 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
if c_string_type != 'bytes' and not c_string_encoding: if c_string_type != 'bytes' and not c_string_encoding:
error(self.pos, "a default encoding must be provided if c_string_type != bytes") error(self.pos, "a default encoding must be provided if c_string_type != bytes")
code.putln('#define __PYX_DEFAULT_STRING_ENCODING_IS_ASCII %s' % int(c_string_encoding == 'ascii')) code.putln('#define __PYX_DEFAULT_STRING_ENCODING_IS_ASCII %s' % int(c_string_encoding == 'ascii'))
code.putln('#define __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT %s' % int(c_string_encoding == 'default'))
code.putln('#define __PYX_DEFAULT_STRING_ENCODING "%s"' % c_string_encoding) code.putln('#define __PYX_DEFAULT_STRING_ENCODING "%s"' % c_string_encoding)
code.putln('#define __Pyx_PyObject_FromString __Pyx_Py%s_FromString' % c_string_type.title()) code.putln('#define __Pyx_PyObject_FromString __Pyx_Py%s_FromString' % c_string_type.title())
code.putln('#define __Pyx_PyObject_FromStringAndSize __Pyx_Py%s_FromStringAndSize' % c_string_type.title()) code.putln('#define __Pyx_PyObject_FromStringAndSize __Pyx_Py%s_FromStringAndSize' % c_string_type.title())
...@@ -1898,8 +1899,8 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode): ...@@ -1898,8 +1899,8 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
code.putln("/*--- Initialize various global constants etc. ---*/") code.putln("/*--- Initialize various global constants etc. ---*/")
code.putln(code.error_goto_if_neg("__Pyx_InitGlobals()", self.pos)) code.putln(code.error_goto_if_neg("__Pyx_InitGlobals()", self.pos))
code.putln("#ifdef __PYX_DEFAULT_STRING_ENCODING_IS_ASCII") code.putln("#if PY_VERSION_HEX < 0x03000000 && (__PYX_DEFAULT_STRING_ENCODING_IS_ASCII || __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT)")
code.putln("if (__Pyx_init_sys_getdefaultencoding_not_ascii() < 0) %s" % code.error_goto(self.pos)) code.putln("if (__Pyx_init_sys_getdefaultencoding_params() < 0) %s" % code.error_goto(self.pos))
code.putln("#endif") code.putln("#endif")
__main__name = code.globalstate.get_py_string_const( __main__name = code.globalstate.get_py_string_const(
......
...@@ -42,7 +42,7 @@ static CYTHON_INLINE size_t __Pyx_PyInt_AsSize_t(PyObject*); ...@@ -42,7 +42,7 @@ static CYTHON_INLINE size_t __Pyx_PyInt_AsSize_t(PyObject*);
#if PY_VERSION_HEX < 0x03000000 && __PYX_DEFAULT_STRING_ENCODING_IS_ASCII #if PY_VERSION_HEX < 0x03000000 && __PYX_DEFAULT_STRING_ENCODING_IS_ASCII
static int __Pyx_sys_getdefaultencoding_not_ascii; static int __Pyx_sys_getdefaultencoding_not_ascii;
static int __Pyx_init_sys_getdefaultencoding_not_ascii() { static int __Pyx_init_sys_getdefaultencoding_params() {
PyObject* sys = NULL; PyObject* sys = NULL;
PyObject* default_encoding = NULL; PyObject* default_encoding = NULL;
PyObject* ascii_chars_u = NULL; PyObject* ascii_chars_u = NULL;
...@@ -84,8 +84,34 @@ bad: ...@@ -84,8 +84,34 @@ bad:
Py_XDECREF(ascii_chars_b); Py_XDECREF(ascii_chars_b);
return -1; return -1;
} }
#endif
#if __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT
#undef __PYX_DEFAULT_STRING_ENCODING
#if PY_VERSION_HEX < 0x03000000
static char* __PYX_DEFAULT_STRING_ENCODING;
static int __Pyx_init_sys_getdefaultencoding_params() {
PyObject* sys = NULL;
PyObject* default_encoding = NULL;
char* default_encoding_c;
sys = PyImport_ImportModule("sys");
if (sys == NULL) goto bad;
default_encoding = PyObject_CallMethod(sys, (char*) (const char*) "getdefaultencoding", NULL);
if (default_encoding == NULL) goto bad;
default_encoding_c = PyBytes_AS_STRING(default_encoding);
__PYX_DEFAULT_STRING_ENCODING = (char*) malloc(strlen(default_encoding_c));
strcpy(__PYX_DEFAULT_STRING_ENCODING, default_encoding_c);
Py_XDECREF(sys);
Py_XDECREF(default_encoding);
return 0;
bad:
Py_XDECREF(sys);
Py_XDECREF(default_encoding);
return -1;
}
#else #else
#define __Pyx_init_sys_getdefaultencoding_not_ascii() 0 #define __PYX_DEFAULT_STRING_ENCODING "utf-8"
#endif
#endif #endif
...@@ -103,9 +129,9 @@ static CYTHON_INLINE char* __Pyx_PyObject_AsString(PyObject* o) { ...@@ -103,9 +129,9 @@ static CYTHON_INLINE char* __Pyx_PyObject_AsString(PyObject* o) {
} }
static CYTHON_INLINE char* __Pyx_PyObject_AsStringAndSize(PyObject* o, Py_ssize_t *length) { static CYTHON_INLINE char* __Pyx_PyObject_AsStringAndSize(PyObject* o, Py_ssize_t *length) {
#if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII #if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII || __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT
if ( if (
#if PY_VERSION_HEX < 0x03000000 #if PY_VERSION_HEX < 0x03000000 && __PYX_DEFAULT_STRING_ENCODING_IS_ASCII
__Pyx_sys_getdefaultencoding_not_ascii && __Pyx_sys_getdefaultencoding_not_ascii &&
#endif #endif
PyUnicode_Check(o)) { PyUnicode_Check(o)) {
...@@ -113,20 +139,23 @@ static CYTHON_INLINE char* __Pyx_PyObject_AsStringAndSize(PyObject* o, Py_ssize_ ...@@ -113,20 +139,23 @@ static CYTHON_INLINE char* __Pyx_PyObject_AsStringAndSize(PyObject* o, Py_ssize_
// borrowed, cached reference // borrowed, cached reference
PyObject* defenc = _PyUnicode_AsDefaultEncodedString(o, NULL); PyObject* defenc = _PyUnicode_AsDefaultEncodedString(o, NULL);
if (!defenc) return NULL; if (!defenc) return NULL;
char* maybe_ascii = PyBytes_AS_STRING(defenc); char* defenc_c = PyBytes_AS_STRING(defenc);
char* end = maybe_ascii + PyBytes_GET_SIZE(defenc); #if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII
char* end = defenc_c + PyBytes_GET_SIZE(defenc);
char* c; char* c;
for (c = maybe_ascii; c < end; c++) { for (c = defenc_c; c < end; c++) {
if ((unsigned char) (*c) >= 128) { if ((unsigned char) (*c) >= 128) {
// raise the error // raise the error
PyUnicode_AsASCIIString(o); PyUnicode_AsASCIIString(o);
return NULL; return NULL;
} }
} }
#endif /*__PYX_DEFAULT_STRING_ENCODING_IS_ASCII*/
*length = PyBytes_GET_SIZE(defenc); *length = PyBytes_GET_SIZE(defenc);
return maybe_ascii; return defenc_c;
#else /* PY_VERSION_HEX < 0x03030000 */ #else /* PY_VERSION_HEX < 0x03030000 */
if (PyUnicode_READY(o) == -1) return NULL; if (PyUnicode_READY(o) == -1) return NULL;
#if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII
if (PyUnicode_IS_ASCII(o)) { if (PyUnicode_IS_ASCII(o)) {
// cached for the lifetime of the object // cached for the lifetime of the object
*length = PyUnicode_GET_DATA_SIZE(o); *length = PyUnicode_GET_DATA_SIZE(o);
...@@ -136,9 +165,12 @@ static CYTHON_INLINE char* __Pyx_PyObject_AsStringAndSize(PyObject* o, Py_ssize_ ...@@ -136,9 +165,12 @@ static CYTHON_INLINE char* __Pyx_PyObject_AsStringAndSize(PyObject* o, Py_ssize_
PyUnicode_AsASCIIString(o); PyUnicode_AsASCIIString(o);
return NULL; return NULL;
} }
#else /* __PYX_DEFAULT_STRING_ENCODING_IS_ASCII */
return PyUnicode_AsUTF8AndSize(o, length);
#endif /* __PYX_DEFAULT_STRING_ENCODING_IS_ASCII */
#endif /* PY_VERSION_HEX < 0x03030000 */ #endif /* PY_VERSION_HEX < 0x03030000 */
} else } else
#endif /* __PYX_DEFAULT_STRING_ENCODING_IS_ASCII */ #endif /* __PYX_DEFAULT_STRING_ENCODING_IS_ASCII || __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT */
{ {
char* result; char* result;
int r = PyBytes_AsStringAndSize(o, &result, length); int r = PyBytes_AsStringAndSize(o, &result, length);
......
#cython: c_string_type = str
#cython: c_string_encoding = ascii
from libc.string cimport strcmp
def as_objects(char* ascii_data):
"""
>>> as_objects('abc')
'abc'
"""
assert isinstance(<object>ascii_data, str)
assert isinstance(<bytes>ascii_data, bytes)
assert isinstance(<str>ascii_data, str)
assert isinstance(<unicode>ascii_data, unicode)
return ascii_data
def from_object():
"""
>>> from_object()
"""
cdef bytes b = b"abc"
cdef str s = "abc"
cdef unicode u = u"abc"
assert strcmp(<char*>b, "abc") == 0
assert strcmp(<char*>s, "abc") == 0
assert strcmp(<char*>u, "abc") == 0
def slice_as_objects(char* ascii_data, int start, int end):
"""
>>> slice_as_objects('grok', 1, 3)
'ro'
"""
assert isinstance(<object>ascii_data[start:end], str)
assert isinstance(<bytes>ascii_data[start:end], bytes)
assert isinstance(<str>ascii_data[start:end], str)
assert isinstance(<unicode>ascii_data[start:end], unicode)
assert isinstance(<object>ascii_data[start:], str)
assert isinstance(<bytes>ascii_data[start:], bytes)
assert isinstance(<str>ascii_data[start:], str)
assert isinstance(<unicode>ascii_data[start:], unicode)
return ascii_data[start:end]
#cython: c_string_type = str # cython: c_string_type = str
#cython: c_string_encoding = ascii # cython: c_string_encoding = default
from libc.string cimport strcmp import sys
if sys.version_info[0] >= 3:
def as_objects(char* ascii_data): __doc__ = r"""
""" >>> as_objects("ab\xff") == "ab\xff"
>>> as_objects('abc') True
'abc' >>> slice_as_objects("ab\xffd", 1, 4) == "b\xff"
""" True
assert isinstance(<object>ascii_data, str)
assert isinstance(<bytes>ascii_data, bytes)
assert isinstance(<str>ascii_data, str)
assert isinstance(<unicode>ascii_data, unicode)
return ascii_data
def from_object():
"""
>>> from_object()
""" """
cdef bytes b = b"abc"
cdef str s = "abc"
cdef unicode u = u"abc"
assert strcmp(<char*>b, "abc") == 0
assert strcmp(<char*>s, "abc") == 0
assert strcmp(<char*>u, "abc") == 0
def slice_as_objects(char* ascii_data, int start, int end):
"""
>>> slice_as_objects('grok', 1, 3)
'ro'
"""
assert isinstance(<object>ascii_data[start:end], str)
assert isinstance(<bytes>ascii_data[start:end], bytes)
assert isinstance(<str>ascii_data[start:end], str)
assert isinstance(<unicode>ascii_data[start:end], unicode)
assert isinstance(<object>ascii_data[start:], str)
assert isinstance(<bytes>ascii_data[start:], bytes)
assert isinstance(<str>ascii_data[start:], str)
assert isinstance(<unicode>ascii_data[start:], unicode)
return ascii_data[start:end] include "unicode_ascii_encoding.pyx"
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment