Commit 011e8420 authored by Martin v. Löwis's avatar Martin v. Löwis

Issue #5915: Implement PEP 383, Non-decodable Bytes in

System Character Interfaces.
parent 93f65a17
......@@ -322,6 +322,8 @@ and implemented by all standard Python codecs:
| ``'backslashreplace'`` | Replace with backslashed escape sequences |
| | (only for encoding). |
+-------------------------+-----------------------------------------------+
| ``'utf8b'`` | Replace byte with surrogate U+DCxx. |
+-------------------------+-----------------------------------------------+
In addition, the following error handlers are specific to a single codec:
......@@ -333,7 +335,7 @@ In addition, the following error handlers are specific to a single codec:
+------------------+---------+--------------------------------------------+
.. versionadded:: 3.1
The ``'surrogates'`` error handler.
The ``'utf8b'`` and ``'surrogates'`` error handlers.
The set of allowed values can be extended via :meth:`register_error`.
......
......@@ -51,6 +51,30 @@ the :mod:`os` module, but using them is of course a threat to portability!
``'ce'``, ``'java'``.
.. _os-filenames:
File Names, Command Line Arguments, and Environment Variables
-------------------------------------------------------------
In Python, file names, command line arguments, and environment
variables are represented using the string type. On some systems,
decoding these strings to and from bytes is necessary before passing
them to the operating system. Python uses the file system encoding to
perform this conversion (see :func:`sys.getfilesystemencoding`).
.. versionchanged:: 3.1
On some systems, conversion using the file system encoding may
fail. In this case, Python uses the ``utf8b`` encoding error
handler, which means that undecodable bytes are replaced by a
Unicode character U+DCxx on decoding, and these are again
translated to the original byte on encoding.
The file system encoding must guarantee to successfully decode all
bytes below 128. If the file system encoding fails to provide this
guarantee, API functions may raise UnicodeErrors.
.. _os-procinfo:
Process Parameters
......@@ -688,12 +712,8 @@ Files and Directories
.. function:: getcwd()
Return a string representing the current working directory. On Unix
platforms, this function may raise :exc:`UnicodeDecodeError` if the name of
the current directory is not decodable in the file system encoding. Use
:func:`getcwdb` if you need the call to never fail. Availability: Unix,
Windows.
Return a string representing the current working directory.
Availability: Unix, Windows.
.. function:: getcwdb()
......@@ -800,10 +820,8 @@ Files and Directories
entries ``'.'`` and ``'..'`` even if they are present in the directory.
Availability: Unix, Windows.
This function can be called with a bytes or string argument. In the bytes
case, all filenames will be listed as returned by the underlying API. In the
string case, filenames will be decoded using the file system encoding, and
skipped if a decoding error occurs.
This function can be called with a bytes or string argument, and returns
filenames of the same datatype.
.. function:: lstat(path)
......
......@@ -198,6 +198,7 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
# define PyUnicode_FromStringAndSize PyUnicodeUCS2_FromStringAndSize
# define PyUnicode_FromUnicode PyUnicodeUCS2_FromUnicode
# define PyUnicode_FromWideChar PyUnicodeUCS2_FromWideChar
# define PyUnicode_FSConverter PyUnicodeUCS2_FSConverter
# define PyUnicode_GetDefaultEncoding PyUnicodeUCS2_GetDefaultEncoding
# define PyUnicode_GetMax PyUnicodeUCS2_GetMax
# define PyUnicode_GetSize PyUnicodeUCS2_GetSize
......@@ -296,6 +297,7 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
# define PyUnicode_FromStringAndSize PyUnicodeUCS4_FromStringAndSize
# define PyUnicode_FromUnicode PyUnicodeUCS4_FromUnicode
# define PyUnicode_FromWideChar PyUnicodeUCS4_FromWideChar
# define PyUnicode_FSConverter PyUnicodeUCS4_FSConverter
# define PyUnicode_GetDefaultEncoding PyUnicodeUCS4_GetDefaultEncoding
# define PyUnicode_GetMax PyUnicodeUCS4_GetMax
# define PyUnicode_GetSize PyUnicodeUCS4_GetSize
......@@ -693,25 +695,6 @@ PyAPI_FUNC(PyObject *) _PyUnicode_AsDefaultEncodedString(
PyObject *unicode,
const char *errors);
/* Decode a null-terminated string using Py_FileSystemDefaultEncoding.
If the encoding is supported by one of the built-in codecs (i.e., UTF-8,
UTF-16, UTF-32, Latin-1 or MBCS), otherwise fallback to UTF-8 and replace
invalid characters with '?'.
The function is intended to be used for paths and file names only
during bootstrapping process where the codecs are not set up.
*/
PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefault(
const char *s /* encoded string */
);
PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefaultAndSize(
const char *s, /* encoded string */
Py_ssize_t size /* size */
);
/* Returns a pointer to the default encoding (normally, UTF-8) of the
Unicode object unicode and the size of the encoded representation
in bytes stored in *size.
......@@ -1252,6 +1235,33 @@ PyAPI_FUNC(int) PyUnicode_EncodeDecimal(
const char *errors /* error handling */
);
/* --- File system encoding ---------------------------------------------- */
/* ParseTuple converter which converts a Unicode object into the file
system encoding, using the PEP 383 error handler; bytes objects are
output as-is. */
PyAPI_FUNC(int) PyUnicode_FSConverter(PyObject*, void*);
/* Decode a null-terminated string using Py_FileSystemDefaultEncoding.
If the encoding is supported by one of the built-in codecs (i.e., UTF-8,
UTF-16, UTF-32, Latin-1 or MBCS), otherwise fallback to UTF-8 and replace
invalid characters with '?'.
The function is intended to be used for paths and file names only
during bootstrapping process where the codecs are not set up.
*/
PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefault(
const char *s /* encoded string */
);
PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefaultAndSize(
const char *s, /* encoded string */
Py_ssize_t size /* size */
);
/* --- Methods & Slots ----------------------------------------------------
These are capable of handling Unicode objects and strings on input
......
......@@ -1516,6 +1516,34 @@ class TypesTest(unittest.TestCase):
self.assertEquals(codecs.raw_unicode_escape_decode(r"\u1234"), ("\u1234", 6))
self.assertEquals(codecs.raw_unicode_escape_decode(br"\u1234"), ("\u1234", 6))
class Utf8bTest(unittest.TestCase):
def test_utf8(self):
# Bad byte
self.assertEqual(b"foo\x80bar".decode("utf-8", "utf8b"),
"foo\udc80bar")
self.assertEqual("foo\udc80bar".encode("utf-8", "utf8b"),
b"foo\x80bar")
# bad-utf-8 encoded surrogate
self.assertEqual(b"\xed\xb0\x80".decode("utf-8", "utf8b"),
"\udced\udcb0\udc80")
self.assertEqual("\udced\udcb0\udc80".encode("utf-8", "utf8b"),
b"\xed\xb0\x80")
def test_ascii(self):
# bad byte
self.assertEqual(b"foo\x80bar".decode("ascii", "utf8b"),
"foo\udc80bar")
self.assertEqual("foo\udc80bar".encode("ascii", "utf8b"),
b"foo\x80bar")
def test_charmap(self):
# bad byte: \xa5 is unmapped in iso-8859-3
self.assertEqual(b"foo\xa5bar".decode("iso-8859-3", "utf8b"),
"foo\udca5bar")
self.assertEqual("foo\udca5bar".encode("iso-8859-3", "utf8b"),
b"foo\xa5bar")
def test_main():
support.run_unittest(
......@@ -1543,6 +1571,7 @@ def test_main():
CharmapTest,
WithStmtTest,
TypesTest,
Utf8bTest,
)
......
......@@ -7,6 +7,7 @@ import errno
import unittest
import warnings
import sys
import shutil
from test import support
# Tests creating TESTFN
......@@ -698,9 +699,44 @@ if sys.platform != 'win32':
self.assertRaises(os.error, os.setregid, 0, 0)
self.assertRaises(OverflowError, os.setregid, 1<<32, 0)
self.assertRaises(OverflowError, os.setregid, 0, 1<<32)
class Pep383Tests(unittest.TestCase):
filenames = [b'foo\xf6bar', 'foo\xf6bar'.encode("utf-8")]
def setUp(self):
self.fsencoding = sys.getfilesystemencoding()
sys.setfilesystemencoding("utf-8")
self.dir = support.TESTFN
self.bdir = self.dir.encode("utf-8", "utf8b")
os.mkdir(self.dir)
self.unicodefn = []
for fn in self.filenames:
f = open(os.path.join(self.bdir, fn), "w")
f.close()
self.unicodefn.append(fn.decode("utf-8", "utf8b"))
def tearDown(self):
shutil.rmtree(self.dir)
sys.setfilesystemencoding(self.fsencoding)
def test_listdir(self):
expected = set(self.unicodefn)
found = set(os.listdir(support.TESTFN))
self.assertEquals(found, expected)
def test_open(self):
for fn in self.unicodefn:
f = open(os.path.join(self.dir, fn))
f.close()
def test_stat(self):
for fn in self.unicodefn:
os.stat(os.path.join(self.dir, fn))
else:
class PosixUidGidTests(unittest.TestCase):
pass
class Pep383Tests(unittest.TestCase):
pass
def test_main():
support.run_unittest(
......@@ -714,7 +750,8 @@ def test_main():
ExecTests,
Win32ErrorTests,
TestInvalidFD,
PosixUidGidTests
PosixUidGidTests,
Pep383Tests
)
if __name__ == "__main__":
......
......@@ -12,6 +12,8 @@ What's New in Python 3.1 beta 1?
Core and Builtins
-----------------
- Implement PEP 383, Non-decodable Bytes in System Character Interfaces.
- Issue #5890: in subclasses of 'property' the __doc__ attribute was
shadowed by classtype's, even if it was None. property now
inserts the __doc__ into the subclass instance __dict__.
......
......@@ -245,7 +245,7 @@ fileio_init(PyObject *oself, PyObject *args, PyObject *kwds)
return -1;
stringobj = PyUnicode_AsEncodedString(
u, Py_FileSystemDefaultEncoding, NULL);
u, Py_FileSystemDefaultEncoding, "utf8b");
Py_DECREF(u);
if (stringobj == NULL)
return -1;
......
......@@ -493,12 +493,14 @@ convertenviron(void)
char *p = strchr(*e, '=');
if (p == NULL)
continue;
k = PyUnicode_FromStringAndSize(*e, (int)(p-*e));
k = PyUnicode_Decode(*e, (int)(p-*e),
Py_FileSystemDefaultEncoding, "utf8b");
if (k == NULL) {
PyErr_Clear();
continue;
}
v = PyUnicode_FromString(p+1);
v = PyUnicode_Decode(p+1, strlen(p+1),
Py_FileSystemDefaultEncoding, "utf8b");
if (v == NULL) {
PyErr_Clear();
Py_DECREF(k);
......@@ -534,6 +536,37 @@ convertenviron(void)
return d;
}
/* Convert a bytes object to a char*. Optionally lock the buffer if it is a
bytes array. */
static char*
bytes2str(PyObject* o, int lock)
{
if(PyBytes_Check(o))
return PyBytes_AsString(o);
else if(PyByteArray_Check(o)) {
if (lock && PyObject_GetBuffer(o, NULL, 0) < 0)
/* On a bytearray, this should not fail. */
PyErr_BadInternalCall();
return PyByteArray_AsString(o);
} else {
/* The FS converter should have verified that this
is either bytes or bytearray. */
Py_FatalError("bad object passed to bytes2str");
/* not reached. */
return "";
}
}
/* Release the lock, decref the object. */
static void
release_bytes(PyObject* o)
{
if (PyByteArray_Check(o))
o->ob_type->tp_as_buffer->bf_releasebuffer(NULL, 0);
Py_DECREF(o);
}
/* Set a POSIX-specific error from errno, and return NULL */
......@@ -558,10 +591,11 @@ posix_error_with_unicode_filename(Py_UNICODE* name)
static PyObject *
posix_error_with_allocated_filename(char* name)
posix_error_with_allocated_filename(PyObject* name)
{
PyObject *rc = PyErr_SetFromErrnoWithFilename(PyExc_OSError, name);
PyMem_Free(name);
PyObject *rc = PyErr_SetFromErrnoWithFilename(PyExc_OSError,
bytes2str(name, 0));
release_bytes(name);
return rc;
}
......@@ -728,17 +762,19 @@ unicode_file_names(void)
static PyObject *
posix_1str(PyObject *args, char *format, int (*func)(const char*))
{
char *path1 = NULL;
PyObject *opath1 = NULL;
char *path1;
int res;
if (!PyArg_ParseTuple(args, format,
Py_FileSystemDefaultEncoding, &path1))
PyUnicode_FSConverter, &opath1))
return NULL;
path1 = bytes2str(opath1, 1);
Py_BEGIN_ALLOW_THREADS
res = (*func)(path1);
Py_END_ALLOW_THREADS
if (res < 0)
return posix_error_with_allocated_filename(path1);
PyMem_Free(path1);
return posix_error_with_allocated_filename(opath1);
release_bytes(opath1);
Py_INCREF(Py_None);
return Py_None;
}
......@@ -748,17 +784,20 @@ posix_2str(PyObject *args,
char *format,
int (*func)(const char *, const char *))
{
char *path1 = NULL, *path2 = NULL;
PyObject *opath1, *opath2;
char *path1, *path2;
int res;
if (!PyArg_ParseTuple(args, format,
Py_FileSystemDefaultEncoding, &path1,
Py_FileSystemDefaultEncoding, &path2))
PyUnicode_FSConverter, &opath1,
PyUnicode_FSConverter, &opath2))
return NULL;
path1 = bytes2str(opath1, 1);
path2 = bytes2str(opath2, 1);
Py_BEGIN_ALLOW_THREADS
res = (*func)(path1, path2);
Py_END_ALLOW_THREADS
PyMem_Free(path1);
PyMem_Free(path2);
release_bytes(opath1);
release_bytes(opath2);
if (res != 0)
/* XXX how to report both path1 and path2??? */
return posix_error();
......@@ -1560,8 +1599,8 @@ posix_do_stat(PyObject *self, PyObject *args,
int (*wstatfunc)(const Py_UNICODE *, STRUCT_STAT *))
{
STRUCT_STAT st;
char *path = NULL; /* pass this to stat; do not free() it */
char *pathfree = NULL; /* this memory must be free'd */
PyObject *opath;
char *path;
int res;
PyObject *result;
......@@ -1590,25 +1629,24 @@ posix_do_stat(PyObject *self, PyObject *args,
#endif
if (!PyArg_ParseTuple(args, format,
Py_FileSystemDefaultEncoding, &path))
PyUnicode_FSConverter, &opath))
return NULL;
pathfree = path;
path = bytes2str(opath, 1);
Py_BEGIN_ALLOW_THREADS
res = (*statfunc)(path, &st);
Py_END_ALLOW_THREADS
if (res != 0) {
#ifdef MS_WINDOWS
result = win32_error("stat", pathfree);
result = win32_error("stat", path);
#else
result = posix_error_with_filename(pathfree);
result = posix_error_with_filename(path);
#endif
}
else
result = _pystat_fromstructstat(&st);
PyMem_Free(pathfree);
release_bytes(opath);
return result;
}
......@@ -1625,6 +1663,7 @@ existence, or the inclusive-OR of R_OK, W_OK, and X_OK.");
static PyObject *
posix_access(PyObject *self, PyObject *args)
{
PyObject *opath;
char *path;
int mode;
......@@ -1644,13 +1683,14 @@ posix_access(PyObject *self, PyObject *args)
are also valid. */
PyErr_Clear();
}
if (!PyArg_ParseTuple(args, "eti:access",
Py_FileSystemDefaultEncoding, &path, &mode))
if (!PyArg_ParseTuple(args, "O&i:access",
PyUnicode_FSConverter, &opath, &mode))
return 0;
path = bytes2str(opath, 1);
Py_BEGIN_ALLOW_THREADS
attr = GetFileAttributesA(path);
Py_END_ALLOW_THREADS
PyMem_Free(path);
release_bytes(opath);
finish:
if (attr == 0xFFFFFFFF)
/* File does not exist, or cannot read attributes */
......@@ -1663,13 +1703,14 @@ finish:
|| (attr & FILE_ATTRIBUTE_DIRECTORY));
#else
int res;
if (!PyArg_ParseTuple(args, "eti:access",
Py_FileSystemDefaultEncoding, &path, &mode))
if (!PyArg_ParseTuple(args, "O&i:access",
PyUnicode_FSConverter, &opath, &mode))
return NULL;
path = bytes2str(opath, 1);
Py_BEGIN_ALLOW_THREADS
res = access(path, mode);
Py_END_ALLOW_THREADS
PyMem_Free(path);
release_bytes(opath);
return PyBool_FromLong(res == 0);
#endif
}
......@@ -1750,11 +1791,11 @@ posix_chdir(PyObject *self, PyObject *args)
#ifdef MS_WINDOWS
return win32_1str(args, "chdir", "y:chdir", win32_chdir, "U:chdir", win32_wchdir);
#elif defined(PYOS_OS2) && defined(PYCC_GCC)
return posix_1str(args, "et:chdir", _chdir2);
return posix_1str(args, "O&:chdir", _chdir2);
#elif defined(__VMS)
return posix_1str(args, "et:chdir", (int (*)(const char *))chdir);
return posix_1str(args, "O&:chdir", (int (*)(const char *))chdir);
#else
return posix_1str(args, "et:chdir", chdir);
return posix_1str(args, "O&:chdir", chdir);
#endif
}
......@@ -1779,6 +1820,7 @@ Change the access permissions of a file.");
static PyObject *
posix_chmod(PyObject *self, PyObject *args)
{
PyObject *opath = NULL;
char *path = NULL;
int i;
int res;
......@@ -1809,9 +1851,10 @@ posix_chmod(PyObject *self, PyObject *args)
are also valid. */
PyErr_Clear();
}
if (!PyArg_ParseTuple(args, "eti:chmod", Py_FileSystemDefaultEncoding,
&path, &i))
if (!PyArg_ParseTuple(args, "O&i:chmod", PyUnicode_FSConverter,
&opath, &i))
return NULL;
path = bytes2str(opath, 1);
Py_BEGIN_ALLOW_THREADS
attr = GetFileAttributesA(path);
if (attr != 0xFFFFFFFF) {
......@@ -1826,22 +1869,23 @@ posix_chmod(PyObject *self, PyObject *args)
Py_END_ALLOW_THREADS
if (!res) {
win32_error("chmod", path);
PyMem_Free(path);
release_bytes(opath);
return NULL;
}
PyMem_Free(path);
release_bytes(opath);
Py_INCREF(Py_None);
return Py_None;
#else /* Py_WIN_WIDE_FILENAMES */
if (!PyArg_ParseTuple(args, "eti:chmod", Py_FileSystemDefaultEncoding,
&path, &i))
if (!PyArg_ParseTuple(args, "O&i:chmod", PyUnicode_FSConverter,
&opath, &i))
return NULL;
path = bytes2str(opath, 1);
Py_BEGIN_ALLOW_THREADS
res = chmod(path, i);
Py_END_ALLOW_THREADS
if (res < 0)
return posix_error_with_allocated_filename(path);
PyMem_Free(path);
return posix_error_with_allocated_filename(opath);
release_bytes(opath);
Py_INCREF(Py_None);
return Py_None;
#endif
......@@ -1877,18 +1921,20 @@ affects the link itself rather than the target.");
static PyObject *
posix_lchmod(PyObject *self, PyObject *args)
{
char *path = NULL;
PyObject *opath;
char *path;
int i;
int res;
if (!PyArg_ParseTuple(args, "eti:lchmod", Py_FileSystemDefaultEncoding,
&path, &i))
if (!PyArg_ParseTuple(args, "O&i:lchmod", PyUnicode_FSConverter,
&opath, &i))
return NULL;
path = bytes2str(opath, 1)
Py_BEGIN_ALLOW_THREADS
res = lchmod(path, i);
Py_END_ALLOW_THREADS
if (res < 0)
return posix_error_with_allocated_filename(path);
PyMem_Free(path);
return posix_error_with_allocated_filename(opath);
release_bytes(opath);
Py_RETURN_NONE;
}
#endif /* HAVE_LCHMOD */
......@@ -1902,18 +1948,20 @@ Set file flags.");
static PyObject *
posix_chflags(PyObject *self, PyObject *args)
{
PyObject *opath;
char *path;
unsigned long flags;
int res;
if (!PyArg_ParseTuple(args, "etk:chflags",
Py_FileSystemDefaultEncoding, &path, &flags))
if (!PyArg_ParseTuple(args, "O&k:chflags",
PyUnicode_FSConverter, &opath, &flags))
return NULL;
path = bytes2str(opath, 1);
Py_BEGIN_ALLOW_THREADS
res = chflags(path, flags);
Py_END_ALLOW_THREADS
if (res < 0)
return posix_error_with_allocated_filename(path);
PyMem_Free(path);
return posix_error_with_allocated_filename(opath);
release_bytes(opath);
Py_INCREF(Py_None);
return Py_None;
}
......@@ -1928,18 +1976,20 @@ This function will not follow symbolic links.");
static PyObject *
posix_lchflags(PyObject *self, PyObject *args)
{
PyObject *opath;
char *path;
unsigned long flags;
int res;
if (!PyArg_ParseTuple(args, "etk:lchflags",
Py_FileSystemDefaultEncoding, &path, &flags))
if (!PyArg_ParseTuple(args, "O&k:lchflags",
PyUnicode_FSConverter, &path, &flags))
return NULL;
path = bytes2str(opath, 1);
Py_BEGIN_ALLOW_THREADS
res = lchflags(path, flags);
Py_END_ALLOW_THREADS
if (res < 0)
return posix_error_with_allocated_filename(path);
PyMem_Free(path);
return posix_error_with_allocated_filename(opath);
release_bytes(opath);
Py_INCREF(Py_None);
return Py_None;
}
......@@ -1953,7 +2003,7 @@ Change root directory to path.");
static PyObject *
posix_chroot(PyObject *self, PyObject *args)
{
return posix_1str(args, "et:chroot", chroot);
return posix_1str(args, "O&:chroot", chroot);
}
#endif
......@@ -1996,19 +2046,21 @@ Change the owner and group id of path to the numeric uid and gid.");
static PyObject *
posix_chown(PyObject *self, PyObject *args)
{
char *path = NULL;
PyObject *opath;
char *path;
long uid, gid;
int res;
if (!PyArg_ParseTuple(args, "etll:chown",
Py_FileSystemDefaultEncoding, &path,
if (!PyArg_ParseTuple(args, "O&ll:chown",
PyUnicode_FSConverter, &opath,
&uid, &gid))
return NULL;
path = bytes2str(opath, 1);
Py_BEGIN_ALLOW_THREADS
res = chown(path, (uid_t) uid, (gid_t) gid);
Py_END_ALLOW_THREADS
if (res < 0)
return posix_error_with_allocated_filename(path);
PyMem_Free(path);
return posix_error_with_allocated_filename(opath);
release_bytes(opath);
Py_INCREF(Py_None);
return Py_None;
}
......@@ -2045,19 +2097,21 @@ This function will not follow symbolic links.");
static PyObject *
posix_lchown(PyObject *self, PyObject *args)
{
char *path = NULL;
PyObject *opath;
char *path;
int uid, gid;
int res;
if (!PyArg_ParseTuple(args, "etii:lchown",
Py_FileSystemDefaultEncoding, &path,
if (!PyArg_ParseTuple(args, "O&ii:lchown",
PyUnicode_FSConverter, &opath,
&uid, &gid))
return NULL;
path = bytes2str(opath, 1);
Py_BEGIN_ALLOW_THREADS
res = lchown(path, (uid_t) uid, (gid_t) gid);
Py_END_ALLOW_THREADS
if (res < 0)
return posix_error_with_allocated_filename(path);
PyMem_Free(path);
return posix_error_with_allocated_filename(opath);
release_bytes(opath);
Py_INCREF(Py_None);
return Py_None;
}
......@@ -2113,7 +2167,7 @@ posix_getcwd(int use_bytes)
return posix_error();
if (use_bytes)
return PyBytes_FromStringAndSize(buf, strlen(buf));
return PyUnicode_Decode(buf, strlen(buf), Py_FileSystemDefaultEncoding,"strict");
return PyUnicode_Decode(buf, strlen(buf), Py_FileSystemDefaultEncoding,"utf8b");
}
PyDoc_STRVAR(posix_getcwd__doc__,
......@@ -2146,7 +2200,7 @@ Create a hard link to a file.");
static PyObject *
posix_link(PyObject *self, PyObject *args)
{
return posix_2str(args, "etet:link", link);
return posix_2str(args, "O&O&:link", link);
}
#endif /* HAVE_LINK */
......@@ -2171,6 +2225,7 @@ posix_listdir(PyObject *self, PyObject *args)
HANDLE hFindFile;
BOOL result;
WIN32_FIND_DATA FileData;
PyObject *opath;
char namebuf[MAX_PATH+5]; /* Overallocate for \\*.*\0 */
char *bufptr = namebuf;
Py_ssize_t len = sizeof(namebuf)-5; /* only claim to have space for MAX_PATH */
......@@ -2260,9 +2315,16 @@ posix_listdir(PyObject *self, PyObject *args)
}
#endif
if (!PyArg_ParseTuple(args, "et#:listdir",
Py_FileSystemDefaultEncoding, &bufptr, &len))
if (!PyArg_ParseTuple(args, "O&:listdir",
PyUnicode_FSConverter, &opath))
return NULL;
if (PyObject_Size(opath)+1 > MAX_PATH) {
PyErr_SetString(PyExc_ValueError, "path too long");
Py_DECREF(opath);
return NULL;
}
strcpy(namebuf, bytes2str(opath, 0));
len = PyObject_Size(opath);
if (len > 0) {
char ch = namebuf[len-1];
if (ch != SEP && ch != ALTSEP && ch != ':')
......@@ -2324,6 +2386,7 @@ posix_listdir(PyObject *self, PyObject *args)
#ifndef MAX_PATH
#define MAX_PATH CCHMAXPATH
#endif
PyObject *oname;
char *name, *pt;
Py_ssize_t len;
PyObject *d, *v;
......@@ -2333,11 +2396,13 @@ posix_listdir(PyObject *self, PyObject *args)
FILEFINDBUF3 ep;
APIRET rc;
if (!PyArg_ParseTuple(args, "et#:listdir",
Py_FileSystemDefaultEncoding, &name, &len))
if (!PyArg_ParseTuple(args, "O&:listdir",
PyUnicode_FSConverter, &oname))
return NULL;
name = bytes2str(oname);
len = PyObject_Size(oname);
if (len >= MAX_PATH) {
PyMem_Free(name);
release_bytes(oname);
PyErr_SetString(PyExc_ValueError, "path too long");
return NULL;
}
......@@ -2350,7 +2415,7 @@ posix_listdir(PyObject *self, PyObject *args)
strcpy(namebuf + len, "*.*");
if ((d = PyList_New(0)) == NULL) {
PyMem_Free(name);
release_bytes(oname);
return NULL;
}
......@@ -2363,7 +2428,7 @@ posix_listdir(PyObject *self, PyObject *args)
if (rc != NO_ERROR) {
errno = ENOENT;
return posix_error_with_allocated_filename(name);
return posix_error_with_allocated_filename(oname);
}
if (srchcnt > 0) { /* If Directory is NOT Totally Empty, */
......@@ -2393,11 +2458,11 @@ posix_listdir(PyObject *self, PyObject *args)
} while (DosFindNext(hdir, &ep, sizeof(ep), &srchcnt) == NO_ERROR && srchcnt > 0);
}
PyMem_Free(name);
release_bytes(oname);
return d;
#else
char *name = NULL;
PyObject *oname;
char *name;
PyObject *d, *v;
DIR *dirp;
struct dirent *ep;
......@@ -2408,14 +2473,15 @@ posix_listdir(PyObject *self, PyObject *args)
arg_is_unicode = 0;
PyErr_Clear();
}
if (!PyArg_ParseTuple(args, "et:listdir", Py_FileSystemDefaultEncoding, &name))
if (!PyArg_ParseTuple(args, "O&:listdir", PyUnicode_FSConverter, &oname))
return NULL;
name = bytes2str(oname, 1);
if ((dirp = opendir(name)) == NULL) {
return posix_error_with_allocated_filename(name);
return posix_error_with_allocated_filename(oname);
}
if ((d = PyList_New(0)) == NULL) {
closedir(dirp);
PyMem_Free(name);
release_bytes(oname);
return NULL;
}
for (;;) {
......@@ -2429,7 +2495,7 @@ posix_listdir(PyObject *self, PyObject *args)
} else {
closedir(dirp);
Py_DECREF(d);
return posix_error_with_allocated_filename(name);
return posix_error_with_allocated_filename(oname);
}
}
if (ep->d_name[0] == '.' &&
......@@ -2447,18 +2513,16 @@ posix_listdir(PyObject *self, PyObject *args)
w = PyUnicode_FromEncodedObject(v,
Py_FileSystemDefaultEncoding,
"strict");
if (w != NULL) {
Py_DECREF(v);
"utf8b");
Py_DECREF(v);
if (w != NULL)
v = w;
}
else {
/* Ignore undecodable filenames, as discussed
* in issue 3187. To include these,
* use getcwdb(). */
PyErr_Clear();
Py_DECREF(v);
continue;
/* Encoding failed to decode ASCII bytes.
Raise exception. */
Py_DECREF(d);
d = NULL;
break;
}
}
if (PyList_Append(d, v) != 0) {
......@@ -2470,7 +2534,7 @@ posix_listdir(PyObject *self, PyObject *args)
Py_DECREF(v);
}
closedir(dirp);
PyMem_Free(name);
release_bytes(oname);
return d;
......@@ -2482,10 +2546,8 @@ posix_listdir(PyObject *self, PyObject *args)
static PyObject *
posix__getfullpathname(PyObject *self, PyObject *args)
{
/* assume encoded strings won't more than double no of chars */
char inbuf[MAX_PATH*2];
char *inbufp = inbuf;
Py_ssize_t insize = sizeof(inbuf);
PyObject *opath;
char *path;
char outbuf[MAX_PATH*2];
char *temp;
#ifdef Py_WIN_WIDE_FILENAMES
......@@ -2519,13 +2581,17 @@ posix__getfullpathname(PyObject *self, PyObject *args)
PyErr_Clear();
}
#endif
if (!PyArg_ParseTuple (args, "et#:_getfullpathname",
Py_FileSystemDefaultEncoding, &inbufp,
&insize))
if (!PyArg_ParseTuple (args, "O&:_getfullpathname",
PyUnicode_FSConverter, &opath))
return NULL;
if (!GetFullPathName(inbuf, sizeof(outbuf)/sizeof(outbuf[0]),
outbuf, &temp))
return win32_error("GetFullPathName", inbuf);
path = bytes2str(opath, 1);
if (!GetFullPathName(path, sizeof(outbuf)/sizeof(outbuf[0]),
outbuf, &temp)) {
win32_error("GetFullPathName", path);
release_bytes(opath);
return NULL;
}
release_bytes(opath);
if (PyUnicode_Check(PyTuple_GetItem(args, 0))) {
return PyUnicode_Decode(outbuf, strlen(outbuf),
Py_FileSystemDefaultEncoding, NULL);
......@@ -2542,7 +2608,8 @@ static PyObject *
posix_mkdir(PyObject *self, PyObject *args)
{
int res;
char *path = NULL;
PyObject *opath;
char *path;
int mode = 0777;
#ifdef Py_WIN_WIDE_FILENAMES
......@@ -2563,9 +2630,10 @@ posix_mkdir(PyObject *self, PyObject *args)
are also valid. */
PyErr_Clear();
}
if (!PyArg_ParseTuple(args, "et|i:mkdir",
Py_FileSystemDefaultEncoding, &path, &mode))
if (!PyArg_ParseTuple(args, "O&|i:mkdir",
PyUnicode_FSConverter, &opath, &mode))
return NULL;
path = bytes2str(opath, 1);
Py_BEGIN_ALLOW_THREADS
/* PyUnicode_AS_UNICODE OK without thread lock as
it is a simple dereference. */
......@@ -2573,17 +2641,18 @@ posix_mkdir(PyObject *self, PyObject *args)
Py_END_ALLOW_THREADS
if (!res) {
win32_error("mkdir", path);
PyMem_Free(path);
release_bytes(opath);
return NULL;
}
PyMem_Free(path);
release_bytes(opath);
Py_INCREF(Py_None);
return Py_None;
#else
if (!PyArg_ParseTuple(args, "et|i:mkdir",
Py_FileSystemDefaultEncoding, &path, &mode))
if (!PyArg_ParseTuple(args, "O&|i:mkdir",
PyUnicode_FSConverter, &opath, &mode))
return NULL;
path = bytes2str(opath, 1);
Py_BEGIN_ALLOW_THREADS
#if ( defined(__WATCOMC__) || defined(PYCC_VACPP) ) && !defined(__QNX__)
res = mkdir(path);
......@@ -2592,8 +2661,8 @@ posix_mkdir(PyObject *self, PyObject *args)
#endif
Py_END_ALLOW_THREADS
if (res < 0)
return posix_error_with_allocated_filename(path);
PyMem_Free(path);
return posix_error_with_allocated_filename(opath);
release_bytes(opath);
Py_INCREF(Py_None);
return Py_None;
#endif
......@@ -2685,7 +2754,7 @@ error:
Py_INCREF(Py_None);
return Py_None;
#else
return posix_2str(args, "etet:rename", rename);
return posix_2str(args, "O&O&:rename", rename);
#endif
}
......@@ -2700,7 +2769,7 @@ posix_rmdir(PyObject *self, PyObject *args)
#ifdef MS_WINDOWS
return win32_1str(args, "rmdir", "y:rmdir", RemoveDirectoryA, "U:rmdir", RemoveDirectoryW);
#else
return posix_1str(args, "et:rmdir", rmdir);
return posix_1str(args, "O&:rmdir", rmdir);
#endif
}
......@@ -2713,9 +2782,9 @@ static PyObject *
posix_stat(PyObject *self, PyObject *args)
{
#ifdef MS_WINDOWS
return posix_do_stat(self, args, "et:stat", STAT, "U:stat", win32_wstat);
return posix_do_stat(self, args, "O&:stat", STAT, "U:stat", win32_wstat);
#else
return posix_do_stat(self, args, "et:stat", STAT, NULL, NULL);
return posix_do_stat(self, args, "O&:stat", STAT, NULL, NULL);
#endif
}
......@@ -2781,7 +2850,7 @@ posix_unlink(PyObject *self, PyObject *args)
#ifdef MS_WINDOWS
return win32_1str(args, "remove", "y:remove", DeleteFileA, "U:remove", DeleteFileW);
#else
return posix_1str(args, "et:remove", unlink);
return posix_1str(args, "O&:remove", unlink);
#endif
}
......@@ -2853,7 +2922,8 @@ posix_utime(PyObject *self, PyObject *args)
PyObject *arg;
PyUnicodeObject *obwpath;
wchar_t *wpath = NULL;
char *apath = NULL;
PyObject *oapath;
char *apath;
HANDLE hFile;
long atimesec, mtimesec, ausec, musec;
FILETIME atime, mtime;
......@@ -2875,9 +2945,10 @@ posix_utime(PyObject *self, PyObject *args)
PyErr_Clear();
}
if (!wpath) {
if (!PyArg_ParseTuple(args, "etO:utime",
Py_FileSystemDefaultEncoding, &apath, &arg))
if (!PyArg_ParseTuple(args, "O&O:utime",
PyUnicode_FSConverter, &oapath, &arg))
return NULL;
apath = bytes2str(oapath, 1);
Py_BEGIN_ALLOW_THREADS
hFile = CreateFileA(apath, FILE_WRITE_ATTRIBUTES, 0,
NULL, OPEN_EXISTING,
......@@ -2885,10 +2956,10 @@ posix_utime(PyObject *self, PyObject *args)
Py_END_ALLOW_THREADS
if (hFile == INVALID_HANDLE_VALUE) {
win32_error("utime", apath);
PyMem_Free(apath);
release_bytes(oapath);
return NULL;
}
PyMem_Free(apath);
release_bytes(oapath);
}
if (arg == Py_None) {
......@@ -2929,7 +3000,8 @@ done:
return result;
#else /* Py_WIN_WIDE_FILENAMES */
char *path = NULL;
PyObject *opath;
char *path;
long atime, mtime, ausec, musec;
int res;
PyObject* arg;
......@@ -2952,9 +3024,10 @@ done:
#endif /* HAVE_UTIMES */
if (!PyArg_ParseTuple(args, "etO:utime",
Py_FileSystemDefaultEncoding, &path, &arg))
if (!PyArg_ParseTuple(args, "O&O:utime",
PyUnicode_FSConverter, &opath, &arg))
return NULL;
path = bytes2str(opath, 1);
if (arg == Py_None) {
/* optional time values not given */
Py_BEGIN_ALLOW_THREADS
......@@ -2964,18 +3037,18 @@ done:
else if (!PyTuple_Check(arg) || PyTuple_Size(arg) != 2) {
PyErr_SetString(PyExc_TypeError,
"utime() arg 2 must be a tuple (atime, mtime)");
PyMem_Free(path);
release_bytes(opath);
return NULL;
}
else {
if (extract_time(PyTuple_GET_ITEM(arg, 0),
&atime, &ausec) == -1) {
PyMem_Free(path);
release_bytes(opath);
return NULL;
}
if (extract_time(PyTuple_GET_ITEM(arg, 1),
&mtime, &musec) == -1) {
PyMem_Free(path);
release_bytes(opath);
return NULL;
}
ATIME = atime;
......@@ -2993,9 +3066,9 @@ done:
#endif /* HAVE_UTIMES */
}
if (res < 0) {
return posix_error_with_allocated_filename(path);
return posix_error_with_allocated_filename(opath);
}
PyMem_Free(path);
release_bytes(opath);
Py_INCREF(Py_None);
return Py_None;
#undef UTIME_ARG
......@@ -3030,6 +3103,22 @@ free_string_array(char **array, Py_ssize_t count)
PyMem_Free(array[i]);
PyMem_DEL(array);
}
int fsconvert_strdup(PyObject *o, char**out)
{
PyObject *bytes;
Py_ssize_t size;
if (!PyUnicode_FSConverter(o, &bytes))
return 0;
size = PyObject_Size(bytes);
*out = PyMem_Malloc(size+1);
if (!*out)
return 0;
/* Don't lock bytes, as we hold the GIL */
memcpy(*out, bytes2str(bytes, 0), size+1);
Py_DECREF(bytes);
return 1;
}
#endif
......@@ -3044,6 +3133,7 @@ Execute an executable path with arguments, replacing current process.\n\
static PyObject *
posix_execv(PyObject *self, PyObject *args)
{
PyObject *opath;
char *path;
PyObject *argv;
char **argvlist;
......@@ -3053,10 +3143,11 @@ posix_execv(PyObject *self, PyObject *args)
/* execv has two arguments: (path, argv), where
argv is a list or tuple of strings. */
if (!PyArg_ParseTuple(args, "etO:execv",
Py_FileSystemDefaultEncoding,
&path, &argv))
if (!PyArg_ParseTuple(args, "O&O:execv",
PyUnicode_FSConverter,
&opath, &argv))
return NULL;
path = bytes2str(opath, 1);
if (PyList_Check(argv)) {
argc = PyList_Size(argv);
getitem = PyList_GetItem;
......@@ -3067,28 +3158,27 @@ posix_execv(PyObject *self, PyObject *args)
}
else {
PyErr_SetString(PyExc_TypeError, "execv() arg 2 must be a tuple or list");
PyMem_Free(path);
release_bytes(opath);
return NULL;
}
if (argc < 1) {
PyErr_SetString(PyExc_ValueError, "execv() arg 2 must not be empty");
PyMem_Free(path);
release_bytes(opath);
return NULL;
}
argvlist = PyMem_NEW(char *, argc+1);
if (argvlist == NULL) {
PyMem_Free(path);
release_bytes(opath);
return PyErr_NoMemory();
}
for (i = 0; i < argc; i++) {
if (!PyArg_Parse((*getitem)(argv, i), "et",
Py_FileSystemDefaultEncoding,
&argvlist[i])) {
if (!fsconvert_strdup((*getitem)(argv, i),
&argvlist[i])) {
free_string_array(argvlist, i);
PyErr_SetString(PyExc_TypeError,
"execv() arg 2 must contain only strings");
PyMem_Free(path);
release_bytes(opath);
return NULL;
}
......@@ -3100,7 +3190,7 @@ posix_execv(PyObject *self, PyObject *args)
/* If we get here it's definitely an error */
free_string_array(argvlist, argc);
PyMem_Free(path);
release_bytes(opath);
return posix_error();
}
......@@ -3116,6 +3206,7 @@ Execute a path with arguments and environment, replacing current process.\n\
static PyObject *
posix_execve(PyObject *self, PyObject *args)
{
PyObject *opath;
char *path;
PyObject *argv, *env;
char **argvlist;
......@@ -3129,10 +3220,11 @@ posix_execve(PyObject *self, PyObject *args)
argv is a list or tuple of strings and env is a dictionary
like posix.environ. */
if (!PyArg_ParseTuple(args, "etOO:execve",
Py_FileSystemDefaultEncoding,
&path, &argv, &env))
if (!PyArg_ParseTuple(args, "O&OO:execve",
PyUnicode_FSConverter,
&opath, &argv, &env))
return NULL;
path = bytes2str(opath, 1);
if (PyList_Check(argv)) {
argc = PyList_Size(argv);
getitem = PyList_GetItem;
......@@ -3158,10 +3250,8 @@ posix_execve(PyObject *self, PyObject *args)
goto fail_0;
}
for (i = 0; i < argc; i++) {
if (!PyArg_Parse((*getitem)(argv, i),
"et;execve() arg 2 must contain only strings",
Py_FileSystemDefaultEncoding,
&argvlist[i]))
if (!fsconvert_strdup((*getitem)(argv, i),
&argvlist[i]))
{
lastarg = i;
goto fail_1;
......@@ -3243,7 +3333,7 @@ posix_execve(PyObject *self, PyObject *args)
Py_XDECREF(vals);
Py_XDECREF(keys);
fail_0:
PyMem_Free(path);
release_bytes(opath);
return NULL;
}
#endif /* HAVE_EXECV */
......@@ -3261,6 +3351,7 @@ Execute the program 'path' in a new process.\n\
static PyObject *
posix_spawnv(PyObject *self, PyObject *args)
{
PyObject *opath;
char *path;
PyObject *argv;
char **argvlist;
......@@ -3272,10 +3363,11 @@ posix_spawnv(PyObject *self, PyObject *args)
/* spawnv has three arguments: (mode, path, argv), where
argv is a list or tuple of strings. */
if (!PyArg_ParseTuple(args, "ietO:spawnv", &mode,
Py_FileSystemDefaultEncoding,
&path, &argv))
if (!PyArg_ParseTuple(args, "iO&O:spawnv", &mode,
PyUnicode_FSConverter,
&opath, &argv))
return NULL;
path = bytes2str(opath, 1);
if (PyList_Check(argv)) {
argc = PyList_Size(argv);
getitem = PyList_GetItem;
......@@ -3287,24 +3379,23 @@ posix_spawnv(PyObject *self, PyObject *args)
else {
PyErr_SetString(PyExc_TypeError,
"spawnv() arg 2 must be a tuple or list");
PyMem_Free(path);
release_bytes(opath);
return NULL;
}
argvlist = PyMem_NEW(char *, argc+1);
if (argvlist == NULL) {
PyMem_Free(path);
release_bytes(opath);
return PyErr_NoMemory();
}
for (i = 0; i < argc; i++) {
if (!PyArg_Parse((*getitem)(argv, i), "et",
Py_FileSystemDefaultEncoding,
&argvlist[i])) {
if (!fsconvert_strdup((*getitem)(argv, i),
&argvlist[i])) {
free_string_array(argvlist, i);
PyErr_SetString(
PyExc_TypeError,
"spawnv() arg 2 must contain only strings");
PyMem_Free(path);
release_bytes(opath);
return NULL;
}
}
......@@ -3324,7 +3415,7 @@ posix_spawnv(PyObject *self, PyObject *args)
#endif
free_string_array(argvlist, argc);
PyMem_Free(path);
release_bytes(opath);
if (spawnval == -1)
return posix_error();
......@@ -3349,6 +3440,7 @@ Execute the program 'path' in a new process.\n\
static PyObject *
posix_spawnve(PyObject *self, PyObject *args)
{
PyObject *opath;
char *path;
PyObject *argv, *env;
char **argvlist;
......@@ -3364,10 +3456,11 @@ posix_spawnve(PyObject *self, PyObject *args)
argv is a list or tuple of strings and env is a dictionary
like posix.environ. */
if (!PyArg_ParseTuple(args, "ietOO:spawnve", &mode,
Py_FileSystemDefaultEncoding,
&path, &argv, &env))
if (!PyArg_ParseTuple(args, "iO&OO:spawnve", &mode,
PyUnicode_FSConverter,
&opath, &argv, &env))
return NULL;
path = bytes2str(opath, 1);
if (PyList_Check(argv)) {
argc = PyList_Size(argv);
getitem = PyList_GetItem;
......@@ -3393,10 +3486,8 @@ posix_spawnve(PyObject *self, PyObject *args)
goto fail_0;
}
for (i = 0; i < argc; i++) {
if (!PyArg_Parse((*getitem)(argv, i),
"et;spawnve() arg 2 must contain only strings",
Py_FileSystemDefaultEncoding,
&argvlist[i]))
if (!fsconvert_strdup((*getitem)(argv, i),
&argvlist[i]))
{
lastarg = i;
goto fail_1;
......@@ -3486,7 +3577,7 @@ posix_spawnve(PyObject *self, PyObject *args)
Py_XDECREF(vals);
Py_XDECREF(keys);
fail_0:
PyMem_Free(path);
release_bytes(opath);
return res;
}
......@@ -3504,6 +3595,7 @@ search path to find the file.\n\
static PyObject *
posix_spawnvp(PyObject *self, PyObject *args)
{
PyObject *opath;
char *path;
PyObject *argv;
char **argvlist;
......@@ -3514,10 +3606,11 @@ posix_spawnvp(PyObject *self, PyObject *args)
/* spawnvp has three arguments: (mode, path, argv), where
argv is a list or tuple of strings. */
if (!PyArg_ParseTuple(args, "ietO:spawnvp", &mode,
Py_FileSystemDefaultEncoding,
&path, &argv))
if (!PyArg_ParseTuple(args, "iO&O:spawnvp", &mode,
PyUnicode_FSConverter,
&opath, &argv))
return NULL;
path = bytes2str(opath);
if (PyList_Check(argv)) {
argc = PyList_Size(argv);
getitem = PyList_GetItem;
......@@ -3529,24 +3622,23 @@ posix_spawnvp(PyObject *self, PyObject *args)
else {
PyErr_SetString(PyExc_TypeError,
"spawnvp() arg 2 must be a tuple or list");
PyMem_Free(path);
release_bytes(opath);
return NULL;
}
argvlist = PyMem_NEW(char *, argc+1);
if (argvlist == NULL) {
PyMem_Free(path);
release_bytes(opath);
return PyErr_NoMemory();
}
for (i = 0; i < argc; i++) {
if (!PyArg_Parse((*getitem)(argv, i), "et",
Py_FileSystemDefaultEncoding,
&argvlist[i])) {
if (!fsconvert_strdup((*getitem)(argv, i),
&argvlist[i])) {
free_string_array(argvlist, i);
PyErr_SetString(
PyExc_TypeError,
"spawnvp() arg 2 must contain only strings");
PyMem_Free(path);
release_bytes(opath);
return NULL;
}
}
......@@ -3561,7 +3653,7 @@ posix_spawnvp(PyObject *self, PyObject *args)
Py_END_ALLOW_THREADS
free_string_array(argvlist, argc);
PyMem_Free(path);
release_bytes(opath);
if (spawnval == -1)
return posix_error();
......@@ -3583,6 +3675,7 @@ search path to find the file.\n\
static PyObject *
posix_spawnvpe(PyObject *self, PyObject *args)
{
PyObject *opath
char *path;
PyObject *argv, *env;
char **argvlist;
......@@ -3598,9 +3691,10 @@ posix_spawnvpe(PyObject *self, PyObject *args)
like posix.environ. */
if (!PyArg_ParseTuple(args, "ietOO:spawnvpe", &mode,
Py_FileSystemDefaultEncoding,
&path, &argv, &env))
PyUnicode_FSConverter,
&opath, &argv, &env))
return NULL;
path = bytes2str(opath);
if (PyList_Check(argv)) {
argc = PyList_Size(argv);
getitem = PyList_GetItem;
......@@ -3626,10 +3720,8 @@ posix_spawnvpe(PyObject *self, PyObject *args)
goto fail_0;
}
for (i = 0; i < argc; i++) {
if (!PyArg_Parse((*getitem)(argv, i),
"et;spawnvpe() arg 2 must contain only strings",
Py_FileSystemDefaultEncoding,
&argvlist[i]))
if (!fsconvert_strdup((*getitem)(argv, i),
&argvlist[i]))
{
lastarg = i;
goto fail_1;
......@@ -3710,7 +3802,7 @@ posix_spawnvpe(PyObject *self, PyObject *args)
Py_XDECREF(vals);
Py_XDECREF(keys);
fail_0:
PyMem_Free(path);
release_bytes(opath);
return res;
}
#endif /* PYOS_OS2 */
......@@ -4549,12 +4641,12 @@ static PyObject *
posix_lstat(PyObject *self, PyObject *args)
{
#ifdef HAVE_LSTAT
return posix_do_stat(self, args, "et:lstat", lstat, NULL, NULL);
return posix_do_stat(self, args, "O&:lstat", lstat, NULL, NULL);
#else /* !HAVE_LSTAT */
#ifdef MS_WINDOWS
return posix_do_stat(self, args, "et:lstat", STAT, "U:lstat", win32_wstat);
return posix_do_stat(self, args, "O&:lstat", STAT, "U:lstat", win32_wstat);
#else
return posix_do_stat(self, args, "et:lstat", STAT, NULL, NULL);
return posix_do_stat(self, args, "O&:lstat", STAT, NULL, NULL);
#endif
#endif /* !HAVE_LSTAT */
}
......@@ -4570,16 +4662,18 @@ posix_readlink(PyObject *self, PyObject *args)
{
PyObject* v;
char buf[MAXPATHLEN];
PyObject *opath;
char *path;
int n;
int arg_is_unicode = 0;
if (!PyArg_ParseTuple(args, "et:readlink",
Py_FileSystemDefaultEncoding, &path))
if (!PyArg_ParseTuple(args, "O&:readlink",
PyUnicode_FSConverter, &opath))
return NULL;
path = bytes2str(opath, 1);
v = PySequence_GetItem(args, 0);
if (v == NULL) {
PyMem_Free(path);
release_bytes(opath);
return NULL;
}
......@@ -4592,16 +4686,16 @@ posix_readlink(PyObject *self, PyObject *args)
n = readlink(path, buf, (int) sizeof buf);
Py_END_ALLOW_THREADS
if (n < 0)
return posix_error_with_allocated_filename(path);
return posix_error_with_allocated_filename(opath);
PyMem_Free(path);
release_bytes(opath);
v = PyBytes_FromStringAndSize(buf, n);
if (arg_is_unicode) {
PyObject *w;
w = PyUnicode_FromEncodedObject(v,
Py_FileSystemDefaultEncoding,
"strict");
"utf8b");
if (w != NULL) {
Py_DECREF(v);
v = w;
......@@ -4623,7 +4717,7 @@ Create a symbolic link pointing to src named dst.");
static PyObject *
posix_symlink(PyObject *self, PyObject *args)
{
return posix_2str(args, "etet:symlink", symlink);
return posix_2str(args, "O&O&:symlink", symlink);
}
#endif /* HAVE_SYMLINK */
......@@ -4811,7 +4905,8 @@ Open a file (for low level IO).");
static PyObject *
posix_open(PyObject *self, PyObject *args)
{
char *file = NULL;
PyObject *ofile;
char *file;
int flag;
int mode = 0777;
int fd;
......@@ -4835,17 +4930,17 @@ posix_open(PyObject *self, PyObject *args)
}
#endif
if (!PyArg_ParseTuple(args, "eti|i",
Py_FileSystemDefaultEncoding, &file,
if (!PyArg_ParseTuple(args, "O&i|i",
PyUnicode_FSConverter, &ofile,
&flag, &mode))
return NULL;
file = bytes2str(ofile, 1);
Py_BEGIN_ALLOW_THREADS
fd = open(file, flag, mode);
Py_END_ALLOW_THREADS
if (fd < 0)
return posix_error_with_allocated_filename(file);
PyMem_Free(file);
return posix_error_with_allocated_filename(ofile);
release_bytes(ofile);
return PyLong_FromLong((long)fd);
}
......@@ -5289,20 +5384,27 @@ posix_putenv(PyObject *self, PyObject *args)
wchar_t *s1, *s2;
wchar_t *newenv;
#else
PyObject *os1, *os2;
char *s1, *s2;
char *newenv;
#endif
PyObject *newstr;
size_t len;
if (!PyArg_ParseTuple(args,
#ifdef MS_WINDOWS
if (!PyArg_ParseTuple(args,
"uu:putenv",
#else
"ss:putenv",
#endif
&s1, &s2))
return NULL;
#else
if (!PyArg_ParseTuple(args,
"O&O&:putenv",
PyUnicode_FSConverter, &os1,
PyUnicode_FSConverter, &os2))
return NULL;
s1 = bytes2str(os1, 1);
s2 = bytes2str(os2, 1);
#endif
#if defined(PYOS_OS2)
if (stricmp(s1, "BEGINLIBPATH") == 0) {
......@@ -5345,6 +5447,8 @@ posix_putenv(PyObject *self, PyObject *args)
PyOS_snprintf(newenv, len, "%s=%s", s1, s2);
if (putenv(newenv)) {
Py_DECREF(newstr);
release_bytes(os1);
release_bytes(os2);
posix_error();
return NULL;
}
......@@ -5364,6 +5468,10 @@ posix_putenv(PyObject *self, PyObject *args)
#if defined(PYOS_OS2)
}
#endif
#ifndef MS_WINDOWS
release_bytes(os1);
release_bytes(os2);
#endif
Py_INCREF(Py_None);
return Py_None;
......@@ -6688,6 +6796,7 @@ the underlying Win32 ShellExecute function doesn't work if it is.");
static PyObject *
win32_startfile(PyObject *self, PyObject *args)
{
PyObject *ofilepath;
char *filepath;
char *operation = NULL;
HINSTANCE rc;
......@@ -6729,20 +6838,21 @@ win32_startfile(PyObject *self, PyObject *args)
#endif
normal:
if (!PyArg_ParseTuple(args, "et|s:startfile",
Py_FileSystemDefaultEncoding, &filepath,
if (!PyArg_ParseTuple(args, "O&|s:startfile",
PyUnicode_FSConverter, &ofilepath,
&operation))
return NULL;
filepath = bytes2str(ofilepath, 1);
Py_BEGIN_ALLOW_THREADS
rc = ShellExecute((HWND)0, operation, filepath,
NULL, NULL, SW_SHOWNORMAL);
Py_END_ALLOW_THREADS
if (rc <= (HINSTANCE)32) {
PyObject *errval = win32_error("startfile", filepath);
PyMem_Free(filepath);
release_bytes(ofilepath);
return errval;
}
PyMem_Free(filepath);
release_bytes(ofilepath);
Py_INCREF(Py_None);
return Py_None;
}
......
......@@ -14,6 +14,93 @@ wmain(int argc, wchar_t **argv)
return Py_Main(argc, argv);
}
#else
static wchar_t*
char2wchar(char* arg)
{
wchar_t *res;
#ifdef HAVE_BROKEN_MBSTOWCS
/* Some platforms have a broken implementation of
* mbstowcs which does not count the characters that
* would result from conversion. Use an upper bound.
*/
size_t argsize = strlen(arg);
#else
size_t argsize = mbstowcs(NULL, arg, 0);
#endif
size_t count;
unsigned char *in;
wchar_t *out;
#ifdef HAVE_MBRTOWC
mbstate_t mbs;
#endif
if (argsize != (size_t)-1) {
res = (wchar_t *)PyMem_Malloc((argsize+1)*sizeof(wchar_t));
if (!res)
goto oom;
count = mbstowcs(res, arg, argsize+1);
if (count != (size_t)-1)
return res;
PyMem_Free(res);
}
/* Conversion failed. Fall back to escaping with utf8b. */
#ifdef HAVE_MBRTOWC
/* Try conversion with mbrtwoc (C99), and escape non-decodable bytes. */
/* Overallocate; as multi-byte characters are in the argument, the
actual output could use less memory. */
argsize = strlen(arg) + 1;
res = PyMem_Malloc(argsize*sizeof(wchar_t));
if (!res) goto oom;
in = (unsigned char*)arg;
out = res;
memset(&mbs, 0, sizeof mbs);
while (argsize) {
size_t converted = mbrtowc(out, (char*)in, argsize, &mbs);
if (converted == 0)
/* Reached end of string; null char stored. */
break;
if (converted == (size_t)-2) {
/* Incomplete character. This should never happen,
since we provide everything that we have -
unless there is a bug in the C library, or I
misunderstood how mbrtowc works. */
fprintf(stderr, "unexpected mbrtowc result -2\n");
return NULL;
}
if (converted == (size_t)-1) {
/* Conversion error. Escape as UTF-8b, and start over
in the initial shift state. */
*out++ = 0xdc00 + *in++;
argsize--;
memset(&mbs, 0, sizeof mbs);
continue;
}
/* successfully converted some bytes */
in += converted;
argsize -= converted;
out++;
}
#else
/* Cannot use C locale for escaping; manually escape as if charset
is ASCII (i.e. escape all bytes > 128. This will still roundtrip
correctly in the locale's charset, which must be an ASCII superset. */
res = PyMem_Malloc((strlen(arg)+1)*sizeof(wchar_t));
if (!res) goto oom;
in = (unsigned char*)arg;
out = res;
while(*in)
if(*in < 128)
*out++ = *in++;
else
*out++ = 0xdc00 + *in++;
*out = 0;
#endif
return res;
oom:
fprintf(stderr, "out of memory\n");
return NULL;
}
int
main(int argc, char **argv)
{
......@@ -40,31 +127,9 @@ main(int argc, char **argv)
oldloc = strdup(setlocale(LC_ALL, NULL));
setlocale(LC_ALL, "");
for (i = 0; i < argc; i++) {
#ifdef HAVE_BROKEN_MBSTOWCS
/* Some platforms have a broken implementation of
* mbstowcs which does not count the characters that
* would result from conversion. Use an upper bound.
*/
size_t argsize = strlen(argv[i]);
#else
size_t argsize = mbstowcs(NULL, argv[i], 0);
#endif
size_t count;
if (argsize == (size_t)-1) {
fprintf(stderr, "Could not convert argument %d to string\n", i);
argv_copy2[i] = argv_copy[i] = char2wchar(argv[i]);
if (!argv_copy[i])
return 1;
}
argv_copy[i] = (wchar_t *)PyMem_Malloc((argsize+1)*sizeof(wchar_t));
argv_copy2[i] = argv_copy[i];
if (!argv_copy[i]) {
fprintf(stderr, "out of memory\n");
return 1;
}
count = mbstowcs(argv_copy[i], argv[i], argsize+1);
if (count == (size_t)-1) {
fprintf(stderr, "Could not convert argument %d to string\n", i);
return 1;
}
}
setlocale(LC_ALL, oldloc);
free(oldloc);
......
......@@ -1530,6 +1530,53 @@ PyUnicode_DecodeFSDefaultAndSize(const char *s, Py_ssize_t size)
}
}
/* Convert the argument to a bytes object, according to the file
system encoding */
int
PyUnicode_FSConverter(PyObject* arg, void* addr)
{
PyObject *output = NULL;
Py_ssize_t size;
void *data;
if (PyBytes_Check(arg) || PyByteArray_Check(arg)) {
output = arg;
Py_INCREF(output);
}
else {
arg = PyUnicode_FromObject(arg);
if (!arg)
return 0;
output = PyUnicode_AsEncodedObject(arg,
Py_FileSystemDefaultEncoding,
"utf8b");
Py_DECREF(arg);
if (!output)
return 0;
if (!PyBytes_Check(output)) {
Py_DECREF(output);
PyErr_SetString(PyExc_TypeError, "encoder failed to return bytes");
return 0;
}
}
if (PyBytes_Check(output)) {
size = PyBytes_GET_SIZE(output);
data = PyBytes_AS_STRING(output);
}
else {
size = PyByteArray_GET_SIZE(output);
data = PyByteArray_AS_STRING(output);
}
if (size != strlen(data)) {
PyErr_SetString(PyExc_TypeError, "embedded NUL character");
Py_DECREF(output);
return 0;
}
*(PyObject**)addr = output;
return 1;
}
char*
_PyUnicode_AsStringAndSize(PyObject *unicode, Py_ssize_t *psize)
{
......@@ -4154,11 +4201,22 @@ static PyObject *unicode_encode_ucs1(const Py_UNICODE *p,
collstart-startp, collend-startp, &newpos);
if (repunicode == NULL)
goto onError;
if (!PyUnicode_Check(repunicode)) {
/* Implementation limitation: byte results not supported yet. */
PyErr_SetString(PyExc_TypeError, "error handler should return unicode");
if (PyBytes_Check(repunicode)) {
/* Directly copy bytes result to output. */
repsize = PyBytes_Size(repunicode);
if (repsize > 1) {
/* Make room for all additional bytes. */
if (_PyBytes_Resize(&res, ressize+repsize-1)) {
Py_DECREF(repunicode);
goto onError;
}
ressize += repsize-1;
}
memcpy(str, PyBytes_AsString(repunicode), repsize);
str += repsize;
p = startp + newpos;
Py_DECREF(repunicode);
goto onError;
break;
}
/* need more space? (at least enough for what we
have+the replacement+the rest of the string, so
......@@ -5123,11 +5181,24 @@ int charmap_encoding_error(
collstartpos, collendpos, &newpos);
if (repunicode == NULL)
return -1;
if (!PyUnicode_Check(repunicode)) {
/* Implementation limitation: byte results not supported yet. */
PyErr_SetString(PyExc_TypeError, "error handler should return unicode");
if (PyBytes_Check(repunicode)) {
/* Directly copy bytes result to output. */
Py_ssize_t outsize = PyBytes_Size(*res);
Py_ssize_t requiredsize;
repsize = PyBytes_Size(repunicode);
requiredsize = *respos + repsize;
if (requiredsize > outsize)
/* Make room for all additional bytes. */
if (charmapencode_resize(res, respos, requiredsize)) {
Py_DECREF(repunicode);
return -1;
}
memcpy(PyBytes_AsString(*res) + *respos,
PyBytes_AsString(repunicode), repsize);
*respos += repsize;
*inpos = newpos;
Py_DECREF(repunicode);
return -1;
break;
}
/* generate replacement */
repsize = PyUnicode_GET_SIZE(repunicode);
......@@ -5691,7 +5762,7 @@ int PyUnicode_EncodeDecimal(Py_UNICODE *s,
if (repunicode == NULL)
goto onError;
if (!PyUnicode_Check(repunicode)) {
/* Implementation limitation: byte results not supported yet. */
/* Byte results not supported, since they have no decimal property. */
PyErr_SetString(PyExc_TypeError, "error handler should return unicode");
Py_DECREF(repunicode);
goto onError;
......
......@@ -829,6 +829,82 @@ PyCodec_SurrogateErrors(PyObject *exc)
}
}
static PyObject *
PyCodec_UTF8bErrors(PyObject *exc)
{
PyObject *restuple;
PyObject *object;
Py_ssize_t start;
Py_ssize_t end;
PyObject *res;
if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
Py_UNICODE *p;
Py_UNICODE *startp;
char *outp;
if (PyUnicodeEncodeError_GetStart(exc, &start))
return NULL;
if (PyUnicodeEncodeError_GetEnd(exc, &end))
return NULL;
if (!(object = PyUnicodeEncodeError_GetObject(exc)))
return NULL;
startp = PyUnicode_AS_UNICODE(object);
res = PyBytes_FromStringAndSize(NULL, end-start);
if (!res) {
Py_DECREF(object);
return NULL;
}
outp = PyBytes_AsString(res);
for (p = startp+start; p < startp+end; p++) {
Py_UNICODE ch = *p;
if (ch < 0xdc80 || ch > 0xdcff) {
/* Not a UTF-8b surrogate, fail with original exception */
PyErr_SetObject(PyExceptionInstance_Class(exc), exc);
Py_DECREF(res);
Py_DECREF(object);
return NULL;
}
*outp++ = ch - 0xdc00;
}
restuple = Py_BuildValue("(On)", res, end);
Py_DECREF(res);
Py_DECREF(object);
return restuple;
}
else if (PyObject_IsInstance(exc, PyExc_UnicodeDecodeError)) {
unsigned char *p;
Py_UNICODE ch[4]; /* decode up to 4 bad bytes. */
int consumed = 0;
if (PyUnicodeDecodeError_GetStart(exc, &start))
return NULL;
if (PyUnicodeDecodeError_GetEnd(exc, &end))
return NULL;
if (!(object = PyUnicodeDecodeError_GetObject(exc)))
return NULL;
if (!(p = (unsigned char*)PyBytes_AsString(object))) {
Py_DECREF(object);
return NULL;
}
while (consumed < 4 && consumed < end-start) {
/* Refuse to escape ASCII bytes. */
if (p[start+consumed] < 128)
break;
ch[consumed] = 0xdc00 + p[start+consumed];
consumed++;
}
Py_DECREF(object);
if (!consumed) {
/* codec complained about ASCII byte. */
PyErr_SetObject(PyExceptionInstance_Class(exc), exc);
return NULL;
}
return Py_BuildValue("(u#n)", ch, consumed, start+consumed);
}
else {
wrong_exception_type(exc);
return NULL;
}
}
static PyObject *strict_errors(PyObject *self, PyObject *exc)
{
......@@ -864,6 +940,11 @@ static PyObject *surrogates_errors(PyObject *self, PyObject *exc)
return PyCodec_SurrogateErrors(exc);
}
static PyObject *utf8b_errors(PyObject *self, PyObject *exc)
{
return PyCodec_UTF8bErrors(exc);
}
static int _PyCodecRegistry_Init(void)
{
static struct {
......@@ -918,6 +999,14 @@ static int _PyCodecRegistry_Init(void)
surrogates_errors,
METH_O
}
},
{
"utf8b",
{
"utf8b",
utf8b_errors,
METH_O
}
}
};
......
......@@ -262,6 +262,22 @@ Py_InitializeEx(int install_sigs)
_PyImportHooks_Init();
#if defined(HAVE_LANGINFO_H) && defined(CODESET)
/* On Unix, set the file system encoding according to the
user's preference, if the CODESET names a well-known
Python codec, and Py_FileSystemDefaultEncoding isn't
initialized by other means. Also set the encoding of
stdin and stdout if these are terminals. */
codeset = get_codeset();
if (codeset) {
if (!Py_FileSystemDefaultEncoding)
Py_FileSystemDefaultEncoding = codeset;
else
free(codeset);
}
#endif
if (install_sigs)
initsigs(); /* Signal handling stuff, including initintr() */
......@@ -285,22 +301,6 @@ Py_InitializeEx(int install_sigs)
#ifdef WITH_THREAD
_PyGILState_Init(interp, tstate);
#endif /* WITH_THREAD */
#if defined(HAVE_LANGINFO_H) && defined(CODESET)
/* On Unix, set the file system encoding according to the
user's preference, if the CODESET names a well-known
Python codec, and Py_FileSystemDefaultEncoding isn't
initialized by other means. Also set the encoding of
stdin and stdout if these are terminals. */
codeset = get_codeset();
if (codeset) {
if (!Py_FileSystemDefaultEncoding)
Py_FileSystemDefaultEncoding = codeset;
else
free(codeset);
}
#endif
}
void
......
#! /bin/sh
# From configure.in Revision: 71731 .
# From configure.in Revision: 72144 .
# Guess values for system-dependent variables and create Makefiles.
# Generated by GNU Autoconf 2.61 for python 3.1.
#
......@@ -16297,13 +16297,14 @@ echo "${ECHO_T}MACHDEP_OBJS" >&6; }
for ac_func in alarm setitimer getitimer bind_textdomain_codeset chown \
clock confstr ctermid execv fchmod fchown fork fpathconf ftime ftruncate \
gai_strerror getgroups getlogin getloadavg getpeername getpgid getpid \
getpriority getpwent getspnam getspent getsid getwd \
kill killpg lchmod lchown lstat mkfifo mknod mktime \
kill killpg lchmod lchown lstat mbrtowc mkfifo mknod mktime \
mremap nice pathconf pause plock poll pthread_init \
putenv readlink realpath \
select sem_open sem_timedwait sem_getvalue sem_unlink setegid seteuid \
......
......@@ -2403,7 +2403,7 @@ AC_CHECK_FUNCS(alarm setitimer getitimer bind_textdomain_codeset chown \
clock confstr ctermid execv fchmod fchown fork fpathconf ftime ftruncate \
gai_strerror getgroups getlogin getloadavg getpeername getpgid getpid \
getpriority getpwent getspnam getspent getsid getwd \
kill killpg lchmod lchown lstat mkfifo mknod mktime \
kill killpg lchmod lchown lstat mbrtowc mkfifo mknod mktime \
mremap nice pathconf pause plock poll pthread_init \
putenv readlink realpath \
select sem_open sem_timedwait sem_getvalue sem_unlink setegid seteuid \
......
......@@ -419,6 +419,9 @@
/* Define this if you have the makedev macro. */
#undef HAVE_MAKEDEV
/* Define to 1 if you have the `mbrtowc' function. */
#undef HAVE_MBRTOWC
/* Define to 1 if you have the `memmove' function. */
#undef HAVE_MEMMOVE
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment