Commit 47fcb5b4 authored by Victor Stinner's avatar Victor Stinner

Issue #9542: Create PyUnicode_FSDecoder() function

It's a ParseTuple converter: decode bytes objects to unicode using
PyUnicode_DecodeFSDefaultAndSize(); str objects are output as-is.

 * Don't specify surrogateescape error handler in the comments nor the
   documentation, but PyUnicode_DecodeFSDefaultAndSize() and
   PyUnicode_EncodeFSDefault() because these functions use strict error handler
   for the mbcs encoding (on Windows).
 * Remove PyUnicode_FSConverter() comment in unicodeobject.c to avoid
   inconsistency with unicodeobject.h.
parent f2e08b34
...@@ -380,13 +380,25 @@ used, passsing :func:`PyUnicode_FSConverter` as the conversion function: ...@@ -380,13 +380,25 @@ used, passsing :func:`PyUnicode_FSConverter` as the conversion function:
.. cfunction:: int PyUnicode_FSConverter(PyObject* obj, void* result) .. cfunction:: int PyUnicode_FSConverter(PyObject* obj, void* result)
Convert *obj* into *result*, using :cdata:`Py_FileSystemDefaultEncoding`, ParseTuple converter: encode :class:`str` objects to :class:`bytes` using
and the ``"surrogateescape"`` error handler. *result* must be a :cfunc:`PyUnicode_EncodeFSDefault`; :class:`bytes` objects are output as-is.
``PyObject*``, return a :func:`bytes` object which must be released if it *result* must be a :ctype:`PyBytesObject*` which must be released when it is
is no longer used. no longer used.
.. versionadded:: 3.1 .. versionadded:: 3.1
To decode file names during argument parsing, the ``"O&"`` converter should be
used, passsing :func:`PyUnicode_FSDecoder` as the conversion function:
.. cfunction:: int PyUnicode_FSDecoder(PyObject* obj, void* result)
ParseTuple converter: decode :class:`bytes` objects to :class:`str` using
:cfunc:`PyUnicode_DecodeFSDefaultAndSize`; :class:`str` objects are output
as-is. *result* must be a :ctype:`PyUnicodeObject*` which must be released
when it is no longer used.
.. versionadded:: 3.2
.. cfunction:: PyObject* PyUnicode_DecodeFSDefaultAndSize(const char *s, Py_ssize_t size) .. cfunction:: PyObject* PyUnicode_DecodeFSDefaultAndSize(const char *s, Py_ssize_t size)
Decode a null-terminated string using :cdata:`Py_FileSystemDefaultEncoding` Decode a null-terminated string using :cdata:`Py_FileSystemDefaultEncoding`
......
...@@ -200,6 +200,7 @@ typedef PY_UNICODE_TYPE Py_UNICODE; ...@@ -200,6 +200,7 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
# define PyUnicode_FromUnicode PyUnicodeUCS2_FromUnicode # define PyUnicode_FromUnicode PyUnicodeUCS2_FromUnicode
# define PyUnicode_FromWideChar PyUnicodeUCS2_FromWideChar # define PyUnicode_FromWideChar PyUnicodeUCS2_FromWideChar
# define PyUnicode_FSConverter PyUnicodeUCS2_FSConverter # define PyUnicode_FSConverter PyUnicodeUCS2_FSConverter
# define PyUnicode_FSDecoder PyUnicodeUCS2_FSDecoder
# define PyUnicode_GetDefaultEncoding PyUnicodeUCS2_GetDefaultEncoding # define PyUnicode_GetDefaultEncoding PyUnicodeUCS2_GetDefaultEncoding
# define PyUnicode_GetMax PyUnicodeUCS2_GetMax # define PyUnicode_GetMax PyUnicodeUCS2_GetMax
# define PyUnicode_GetSize PyUnicodeUCS2_GetSize # define PyUnicode_GetSize PyUnicodeUCS2_GetSize
...@@ -300,6 +301,7 @@ typedef PY_UNICODE_TYPE Py_UNICODE; ...@@ -300,6 +301,7 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
# define PyUnicode_FromUnicode PyUnicodeUCS4_FromUnicode # define PyUnicode_FromUnicode PyUnicodeUCS4_FromUnicode
# define PyUnicode_FromWideChar PyUnicodeUCS4_FromWideChar # define PyUnicode_FromWideChar PyUnicodeUCS4_FromWideChar
# define PyUnicode_FSConverter PyUnicodeUCS4_FSConverter # define PyUnicode_FSConverter PyUnicodeUCS4_FSConverter
# define PyUnicode_FSDecoder PyUnicodeUCS4_FSDecoder
# define PyUnicode_GetDefaultEncoding PyUnicodeUCS4_GetDefaultEncoding # define PyUnicode_GetDefaultEncoding PyUnicodeUCS4_GetDefaultEncoding
# define PyUnicode_GetMax PyUnicodeUCS4_GetMax # define PyUnicode_GetMax PyUnicodeUCS4_GetMax
# define PyUnicode_GetSize PyUnicodeUCS4_GetSize # define PyUnicode_GetSize PyUnicodeUCS4_GetSize
...@@ -1239,12 +1241,16 @@ PyAPI_FUNC(int) PyUnicode_EncodeDecimal( ...@@ -1239,12 +1241,16 @@ PyAPI_FUNC(int) PyUnicode_EncodeDecimal(
/* --- File system encoding ---------------------------------------------- */ /* --- File system encoding ---------------------------------------------- */
/* ParseTuple converter which converts a Unicode object into the file /* ParseTuple converter: encode str objects to bytes using
system encoding as a bytes object, using the "surrogateescape" error PyUnicode_EncodeFSDefault(); bytes objects are output as-is. */
handler; bytes objects are output as-is. */
PyAPI_FUNC(int) PyUnicode_FSConverter(PyObject*, void*); PyAPI_FUNC(int) PyUnicode_FSConverter(PyObject*, void*);
/* ParseTuple converter: decode bytes objects to unicode using
PyUnicode_DecodeFSDefaultAndSize(); str objects are output as-is. */
PyAPI_FUNC(int) PyUnicode_FSDecoder(PyObject*, void*);
/* Decode a null-terminated string using Py_FileSystemDefaultEncoding /* Decode a null-terminated string using Py_FileSystemDefaultEncoding
and the "surrogateescape" error handler. and the "surrogateescape" error handler.
......
...@@ -12,6 +12,10 @@ What's New in Python 3.2 Alpha 2? ...@@ -12,6 +12,10 @@ What's New in Python 3.2 Alpha 2?
Core and Builtins Core and Builtins
----------------- -----------------
- Issue #9542: Create PyUnicode_FSDecoder() function, a ParseTuple converter:
decode bytes objects to unicode using PyUnicode_DecodeFSDefaultAndSize();
str objects are output as-is.
- Issue #9203: Computed gotos are now enabled by default on supported - Issue #9203: Computed gotos are now enabled by default on supported
compilers (which are detected by the configure script). They can still compilers (which are detected by the configure script). They can still
be disable selectively by specifying --without-computed-gotos. be disable selectively by specifying --without-computed-gotos.
......
...@@ -1652,9 +1652,6 @@ PyUnicode_DecodeFSDefaultAndSize(const char *s, Py_ssize_t size) ...@@ -1652,9 +1652,6 @@ PyUnicode_DecodeFSDefaultAndSize(const char *s, Py_ssize_t size)
} }
} }
/* Convert the argument to a bytes object, according to the file
system encoding. The addr param must be a PyObject**.
This is designed to be used with "O&" in PyArg_Parse APIs. */
int int
PyUnicode_FSConverter(PyObject* arg, void* addr) PyUnicode_FSConverter(PyObject* arg, void* addr)
...@@ -1696,6 +1693,47 @@ PyUnicode_FSConverter(PyObject* arg, void* addr) ...@@ -1696,6 +1693,47 @@ PyUnicode_FSConverter(PyObject* arg, void* addr)
} }
int
PyUnicode_FSDecoder(PyObject* arg, void* addr)
{
PyObject *output = NULL;
Py_ssize_t size;
void *data;
if (arg == NULL) {
Py_DECREF(*(PyObject**)addr);
return 1;
}
if (PyUnicode_Check(arg)) {
output = arg;
Py_INCREF(output);
}
else {
arg = PyBytes_FromObject(arg);
if (!arg)
return 0;
output = PyUnicode_DecodeFSDefaultAndSize(PyBytes_AS_STRING(arg),
PyBytes_GET_SIZE(arg));
Py_DECREF(arg);
if (!output)
return 0;
if (!PyUnicode_Check(output)) {
Py_DECREF(output);
PyErr_SetString(PyExc_TypeError, "decoder failed to return unicode");
return 0;
}
}
size = PyUnicode_GET_SIZE(output);
data = PyUnicode_AS_UNICODE(output);
if (size != Py_UNICODE_strlen(data)) {
PyErr_SetString(PyExc_TypeError, "embedded NUL character");
Py_DECREF(output);
return 0;
}
*(PyObject**)addr = output;
return Py_CLEANUP_SUPPORTED;
}
char* char*
_PyUnicode_AsStringAndSize(PyObject *unicode, Py_ssize_t *psize) _PyUnicode_AsStringAndSize(PyObject *unicode, Py_ssize_t *psize)
{ {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment