Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Gwenaël Samain
cython
Commits
85ee8e60
Commit
85ee8e60
authored
11 years ago
by
Robert Bradshaw
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
support 'default' encoding
parent
2fe2ca90
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
102 additions
and
56 deletions
+102
-56
Cython/Compiler/ExprNodes.py
Cython/Compiler/ExprNodes.py
+2
-2
Cython/Compiler/ModuleNode.py
Cython/Compiler/ModuleNode.py
+3
-2
Cython/Utility/TypeConversion.c
Cython/Utility/TypeConversion.c
+41
-9
tests/run/unicode_ascii_encoding.pyx
tests/run/unicode_ascii_encoding.pyx
+43
-0
tests/run/unicode_default_encoding.pyx
tests/run/unicode_default_encoding.pyx
+13
-43
No files found.
Cython/Compiler/ExprNodes.py
View file @
85ee8e60
...
...
@@ -82,10 +82,10 @@ def find_coercion_error(type_tuple, default, env):
and
env
.
directives
[
'c_string_encoding'
]):
if
type_tuple
[
1
].
is_pyobject
:
return
default
elif
env
.
directives
[
'c_string_encoding'
]
==
'ascii'
:
elif
env
.
directives
[
'c_string_encoding'
]
in
(
'ascii'
,
'default'
)
:
return
default
else
:
return
"'%s' objects do not support coercion to C types with non-ascii
default
encoding"
%
type_tuple
[
0
].
name
return
"'%s' objects do not support coercion to C types with non-ascii
or non-default c_string_
encoding"
%
type_tuple
[
0
].
name
else
:
return
err
...
...
This diff is collapsed.
Click to expand it.
Cython/Compiler/ModuleNode.py
View file @
85ee8e60
...
...
@@ -562,6 +562,7 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
if
c_string_type
!=
'bytes'
and
not
c_string_encoding
:
error
(
self
.
pos
,
"a default encoding must be provided if c_string_type != bytes"
)
code
.
putln
(
'#define __PYX_DEFAULT_STRING_ENCODING_IS_ASCII %s'
%
int
(
c_string_encoding
==
'ascii'
))
code
.
putln
(
'#define __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT %s'
%
int
(
c_string_encoding
==
'default'
))
code
.
putln
(
'#define __PYX_DEFAULT_STRING_ENCODING "%s"'
%
c_string_encoding
)
code
.
putln
(
'#define __Pyx_PyObject_FromString __Pyx_Py%s_FromString'
%
c_string_type
.
title
())
code
.
putln
(
'#define __Pyx_PyObject_FromStringAndSize __Pyx_Py%s_FromStringAndSize'
%
c_string_type
.
title
())
...
...
@@ -1898,8 +1899,8 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
code
.
putln
(
"/*--- Initialize various global constants etc. ---*/"
)
code
.
putln
(
code
.
error_goto_if_neg
(
"__Pyx_InitGlobals()"
,
self
.
pos
))
code
.
putln
(
"#if
def __PYX_DEFAULT_STRING_ENCODING_IS_ASCII
"
)
code
.
putln
(
"if (__Pyx_init_sys_getdefaultencoding_
not_ascii
() < 0) %s"
%
code
.
error_goto
(
self
.
pos
))
code
.
putln
(
"#if
PY_VERSION_HEX < 0x03000000 && (__PYX_DEFAULT_STRING_ENCODING_IS_ASCII || __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT)
"
)
code
.
putln
(
"if (__Pyx_init_sys_getdefaultencoding_
params
() < 0) %s"
%
code
.
error_goto
(
self
.
pos
))
code
.
putln
(
"#endif"
)
__main__name
=
code
.
globalstate
.
get_py_string_const
(
...
...
This diff is collapsed.
Click to expand it.
Cython/Utility/TypeConversion.c
View file @
85ee8e60
...
...
@@ -42,7 +42,7 @@ static CYTHON_INLINE size_t __Pyx_PyInt_AsSize_t(PyObject*);
#if PY_VERSION_HEX < 0x03000000 && __PYX_DEFAULT_STRING_ENCODING_IS_ASCII
static
int
__Pyx_sys_getdefaultencoding_not_ascii
;
static
int
__Pyx_init_sys_getdefaultencoding_
not_ascii
()
{
static
int
__Pyx_init_sys_getdefaultencoding_
params
()
{
PyObject
*
sys
=
NULL
;
PyObject
*
default_encoding
=
NULL
;
PyObject
*
ascii_chars_u
=
NULL
;
...
...
@@ -84,8 +84,34 @@ bad:
Py_XDECREF
(
ascii_chars_b
);
return
-
1
;
}
#endif
#if __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT
#undef __PYX_DEFAULT_STRING_ENCODING
#if PY_VERSION_HEX < 0x03000000
static
char
*
__PYX_DEFAULT_STRING_ENCODING
;
static
int
__Pyx_init_sys_getdefaultencoding_params
()
{
PyObject
*
sys
=
NULL
;
PyObject
*
default_encoding
=
NULL
;
char
*
default_encoding_c
;
sys
=
PyImport_ImportModule
(
"sys"
);
if
(
sys
==
NULL
)
goto
bad
;
default_encoding
=
PyObject_CallMethod
(
sys
,
(
char
*
)
(
const
char
*
)
"getdefaultencoding"
,
NULL
);
if
(
default_encoding
==
NULL
)
goto
bad
;
default_encoding_c
=
PyBytes_AS_STRING
(
default_encoding
);
__PYX_DEFAULT_STRING_ENCODING
=
(
char
*
)
malloc
(
strlen
(
default_encoding_c
));
strcpy
(
__PYX_DEFAULT_STRING_ENCODING
,
default_encoding_c
);
Py_XDECREF
(
sys
);
Py_XDECREF
(
default_encoding
);
return
0
;
bad:
Py_XDECREF
(
sys
);
Py_XDECREF
(
default_encoding
);
return
-
1
;
}
#else
#define __Pyx_init_sys_getdefaultencoding_not_ascii() 0
#define __PYX_DEFAULT_STRING_ENCODING "utf-8"
#endif
#endif
...
...
@@ -103,9 +129,9 @@ static CYTHON_INLINE char* __Pyx_PyObject_AsString(PyObject* o) {
}
static
CYTHON_INLINE
char
*
__Pyx_PyObject_AsStringAndSize
(
PyObject
*
o
,
Py_ssize_t
*
length
)
{
#if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII
#if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII
|| __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT
if
(
#if PY_VERSION_HEX < 0x03000000
#if PY_VERSION_HEX < 0x03000000
&& __PYX_DEFAULT_STRING_ENCODING_IS_ASCII
__Pyx_sys_getdefaultencoding_not_ascii
&&
#endif
PyUnicode_Check
(
o
))
{
...
...
@@ -113,20 +139,23 @@ static CYTHON_INLINE char* __Pyx_PyObject_AsStringAndSize(PyObject* o, Py_ssize_
// borrowed, cached reference
PyObject
*
defenc
=
_PyUnicode_AsDefaultEncodedString
(
o
,
NULL
);
if
(
!
defenc
)
return
NULL
;
char
*
maybe_ascii
=
PyBytes_AS_STRING
(
defenc
);
char
*
end
=
maybe_ascii
+
PyBytes_GET_SIZE
(
defenc
);
char
*
defenc_c
=
PyBytes_AS_STRING
(
defenc
);
#if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII
char
*
end
=
defenc_c
+
PyBytes_GET_SIZE
(
defenc
);
char
*
c
;
for
(
c
=
maybe_ascii
;
c
<
end
;
c
++
)
{
for
(
c
=
defenc_c
;
c
<
end
;
c
++
)
{
if
((
unsigned
char
)
(
*
c
)
>=
128
)
{
// raise the error
PyUnicode_AsASCIIString
(
o
);
return
NULL
;
}
}
#endif
/*__PYX_DEFAULT_STRING_ENCODING_IS_ASCII*/
*
length
=
PyBytes_GET_SIZE
(
defenc
);
return
maybe_ascii
;
return
defenc_c
;
#else
/* PY_VERSION_HEX < 0x03030000 */
if
(
PyUnicode_READY
(
o
)
==
-
1
)
return
NULL
;
#if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII
if
(
PyUnicode_IS_ASCII
(
o
))
{
// cached for the lifetime of the object
*
length
=
PyUnicode_GET_DATA_SIZE
(
o
);
...
...
@@ -136,9 +165,12 @@ static CYTHON_INLINE char* __Pyx_PyObject_AsStringAndSize(PyObject* o, Py_ssize_
PyUnicode_AsASCIIString
(
o
);
return
NULL
;
}
#else
/* __PYX_DEFAULT_STRING_ENCODING_IS_ASCII */
return
PyUnicode_AsUTF8AndSize
(
o
,
length
);
#endif
/* __PYX_DEFAULT_STRING_ENCODING_IS_ASCII */
#endif
/* PY_VERSION_HEX < 0x03030000 */
}
else
#endif
/* __PYX_DEFAULT_STRING_ENCODING_IS_ASCII */
#endif
/* __PYX_DEFAULT_STRING_ENCODING_IS_ASCII
|| __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT
*/
{
char
*
result
;
int
r
=
PyBytes_AsStringAndSize
(
o
,
&
result
,
length
);
...
...
This diff is collapsed.
Click to expand it.
tests/run/unicode_ascii_encoding.pyx
0 → 100644
View file @
85ee8e60
#cython: c_string_type = str
#cython: c_string_encoding = ascii
from
libc.string
cimport
strcmp
def
as_objects
(
char
*
ascii_data
):
"""
>>> as_objects('abc')
'abc'
"""
assert
isinstance
(
<
object
>
ascii_data
,
str
)
assert
isinstance
(
<
bytes
>
ascii_data
,
bytes
)
assert
isinstance
(
<
str
>
ascii_data
,
str
)
assert
isinstance
(
<
unicode
>
ascii_data
,
unicode
)
return
ascii_data
def
from_object
():
"""
>>> from_object()
"""
cdef
bytes
b
=
b"abc"
cdef
str
s
=
"abc"
cdef
unicode
u
=
u"abc"
assert
strcmp
(
<
char
*>
b
,
"abc"
)
==
0
assert
strcmp
(
<
char
*>
s
,
"abc"
)
==
0
assert
strcmp
(
<
char
*>
u
,
"abc"
)
==
0
def
slice_as_objects
(
char
*
ascii_data
,
int
start
,
int
end
):
"""
>>> slice_as_objects('grok', 1, 3)
'ro'
"""
assert
isinstance
(
<
object
>
ascii_data
[
start
:
end
],
str
)
assert
isinstance
(
<
bytes
>
ascii_data
[
start
:
end
],
bytes
)
assert
isinstance
(
<
str
>
ascii_data
[
start
:
end
],
str
)
assert
isinstance
(
<
unicode
>
ascii_data
[
start
:
end
],
unicode
)
assert
isinstance
(
<
object
>
ascii_data
[
start
:],
str
)
assert
isinstance
(
<
bytes
>
ascii_data
[
start
:],
bytes
)
assert
isinstance
(
<
str
>
ascii_data
[
start
:],
str
)
assert
isinstance
(
<
unicode
>
ascii_data
[
start
:],
unicode
)
return
ascii_data
[
start
:
end
]
This diff is collapsed.
Click to expand it.
tests/run/unicode_default_encoding.pyx
View file @
85ee8e60
#cython: c_string_type = str
#cython: c_string_encoding = ascii
from
libc.string
cimport
strcmp
def
as_objects
(
char
*
ascii_data
):
"""
>>> as_objects('abc')
'abc'
"""
assert
isinstance
(
<
object
>
ascii_data
,
str
)
assert
isinstance
(
<
bytes
>
ascii_data
,
bytes
)
assert
isinstance
(
<
str
>
ascii_data
,
str
)
assert
isinstance
(
<
unicode
>
ascii_data
,
unicode
)
return
ascii_data
def
from_object
():
"""
>>> from_object()
"""
cdef
bytes
b
=
b"abc"
cdef
str
s
=
"abc"
cdef
unicode
u
=
u"abc"
assert
strcmp
(
<
char
*>
b
,
"abc"
)
==
0
assert
strcmp
(
<
char
*>
s
,
"abc"
)
==
0
assert
strcmp
(
<
char
*>
u
,
"abc"
)
==
0
def
slice_as_objects
(
char
*
ascii_data
,
int
start
,
int
end
):
"""
>>> slice_as_objects('grok', 1, 3)
'ro'
"""
assert
isinstance
(
<
object
>
ascii_data
[
start
:
end
],
str
)
assert
isinstance
(
<
bytes
>
ascii_data
[
start
:
end
],
bytes
)
assert
isinstance
(
<
str
>
ascii_data
[
start
:
end
],
str
)
assert
isinstance
(
<
unicode
>
ascii_data
[
start
:
end
],
unicode
)
assert
isinstance
(
<
object
>
ascii_data
[
start
:],
str
)
assert
isinstance
(
<
bytes
>
ascii_data
[
start
:],
bytes
)
assert
isinstance
(
<
str
>
ascii_data
[
start
:],
str
)
assert
isinstance
(
<
unicode
>
ascii_data
[
start
:],
unicode
)
return
ascii_data
[
start
:
end
]
# cython: c_string_type = str
# cython: c_string_encoding = default
import
sys
if
sys
.
version_info
[
0
]
>=
3
:
__doc__
=
r"""
>>> as_objects("ab\xff") == "ab\xff"
True
>>> slice_as_objects("ab\xffd", 1, 4) == "b\xff"
True
"""
include
"unicode_ascii_encoding.pyx"
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment