Issue #5915: Implement PEP 383, Non-decodable Bytes in

System Character Interfaces.

Issue #5915: Implement PEP 383, Non-decodable Bytes in
System Character Interfaces.
011e8420 · Martin v. Löwis · 93f65a17 · 011e8420 · 011e8420 · 011e8420
Commit 011e8420 authored May 05, 2009 by Martin v. Löwis
15 changed files
--- a/Doc/library/codecs.rst
+++ b/Doc/library/codecs.rst
@@ -322,6 +322,8 @@ and implemented by all standard Python codecs:
 | ``'backslashreplace'``  | Replace with backslashed escape sequences     |
 |                         | (only for encoding).                          |
 +-------------------------+-----------------------------------------------+
+| ``'utf8b'``             | Replace byte with surrogate U+DCxx.           |
+-------------------------+-----------------------------------------------+

 In addition, the following error handlers are specific to a single codec:

@@ -333,7 +335,7 @@ In addition, the following error handlers are specific to a single codec:
 +------------------+---------+--------------------------------------------+

 .. versionadded:: 3.1
-   The ``'surrogates'`` error handler.
+   The ``'utf8b'`` and ``'surrogates'`` error handlers.

 The set of allowed values can be extended via :meth:`register_error`.


--- a/Doc/library/os.rst
+++ b/Doc/library/os.rst
@@ -51,6 +51,30 @@ the :mod:`os` module, but using them is of course a threat to portability!
   ``'ce'``, ``'java'``.


+.. _os-filenames:
+
+File Names, Command Line Arguments, and Environment Variables
+-------------------------------------------------------------
+
+In Python, file names, command line arguments, and environment
+variables are represented using the string type. On some systems,
+decoding these strings to and from bytes is necessary before passing
+them to the operating system. Python uses the file system encoding to
+perform this conversion (see :func:`sys.getfilesystemencoding`).
+
+.. versionchanged:: 3.1
+   On some systems, conversion using the file system encoding may
+   fail. In this case, Python uses the ``utf8b`` encoding error
+   handler, which means that undecodable bytes are replaced by a
+   Unicode character U+DCxx on decoding, and these are again
+   translated to the original byte on encoding.
+
+
+The file system encoding must guarantee to successfully decode all
+bytes below 128. If the file system encoding fails to provide this
+guarantee, API functions may raise UnicodeErrors.
+
+
 .. _os-procinfo:

 Process Parameters
@@ -688,12 +712,8 @@ Files and Directories

 .. function:: getcwd()

-   Return a string representing the current working directory.  On Unix
-   platforms, this function may raise :exc:`UnicodeDecodeError` if the name of
-   the current directory is not decodable in the file system encoding.  Use
-   :func:`getcwdb` if you need the call to never fail. Availability: Unix,
-   Windows.
-
+   Return a string representing the current working directory.
+   Availability: Unix, Windows.

 .. function:: getcwdb()

@@ -800,10 +820,8 @@ Files and Directories
   entries ``'.'`` and ``'..'`` even if they are present in the directory.
   Availability: Unix, Windows.

-   This function can be called with a bytes or string argument.  In the bytes
-   case, all filenames will be listed as returned by the underlying API.  In the
-   string case, filenames will be decoded using the file system encoding, and
-   skipped if a decoding error occurs.
+   This function can be called with a bytes or string argument, and returns
+   filenames of the same datatype.


 .. function:: lstat(path)

--- a/Include/unicodeobject.h
+++ b/Include/unicodeobject.h
@@ -198,6 +198,7 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
 # define PyUnicode_FromStringAndSize PyUnicodeUCS2_FromStringAndSize
 # define PyUnicode_FromUnicode PyUnicodeUCS2_FromUnicode
 # define PyUnicode_FromWideChar PyUnicodeUCS2_FromWideChar
+# define PyUnicode_FSConverter PyUnicodeUCS2_FSConverter
 # define PyUnicode_GetDefaultEncoding PyUnicodeUCS2_GetDefaultEncoding
 # define PyUnicode_GetMax PyUnicodeUCS2_GetMax
 # define PyUnicode_GetSize PyUnicodeUCS2_GetSize
@@ -296,6 +297,7 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
 # define PyUnicode_FromStringAndSize PyUnicodeUCS4_FromStringAndSize
 # define PyUnicode_FromUnicode PyUnicodeUCS4_FromUnicode
 # define PyUnicode_FromWideChar PyUnicodeUCS4_FromWideChar
+# define PyUnicode_FSConverter PyUnicodeUCS4_FSConverter
 # define PyUnicode_GetDefaultEncoding PyUnicodeUCS4_GetDefaultEncoding
 # define PyUnicode_GetMax PyUnicodeUCS4_GetMax
 # define PyUnicode_GetSize PyUnicodeUCS4_GetSize
@@ -693,25 +695,6 @@ PyAPI_FUNC(PyObject *) _PyUnicode_AsDefaultEncodedString(
    PyObject *unicode,
    const char *errors);

-/* Decode a null-terminated string using Py_FileSystemDefaultEncoding.
-
-   If the encoding is supported by one of the built-in codecs (i.e., UTF-8,
-   UTF-16, UTF-32, Latin-1 or MBCS), otherwise fallback to UTF-8 and replace
-   invalid characters with '?'.
-
-   The function is intended to be used for paths and file names only
-   during bootstrapping process where the codecs are not set up.
-*/
-
-PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefault(
-    const char *s               /* encoded string */
-    );
-
-PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefaultAndSize(
-    const char *s,               /* encoded string */
-    Py_ssize_t size              /* size */
-    );
-
 /* Returns a pointer to the default encoding (normally, UTF-8) of the
   Unicode object unicode and the size of the encoded representation
   in bytes stored in *size.
@@ -1252,6 +1235,33 @@ PyAPI_FUNC(int) PyUnicode_EncodeDecimal(
    const char *errors		/* error handling */
    );

+/* --- File system encoding ---------------------------------------------- */
+
+/* ParseTuple converter which converts a Unicode object into the file
+   system encoding, using the PEP 383 error handler; bytes objects are
+   output as-is. */
+
+PyAPI_FUNC(int) PyUnicode_FSConverter(PyObject*, void*);
+
+/* Decode a null-terminated string using Py_FileSystemDefaultEncoding.
+
+   If the encoding is supported by one of the built-in codecs (i.e., UTF-8,
+   UTF-16, UTF-32, Latin-1 or MBCS), otherwise fallback to UTF-8 and replace
+   invalid characters with '?'.
+
+   The function is intended to be used for paths and file names only
+   during bootstrapping process where the codecs are not set up.
+*/
+
+PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefault(
+    const char *s               /* encoded string */
+    );
+
+PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefaultAndSize(
+    const char *s,               /* encoded string */
+    Py_ssize_t size              /* size */
+    );
+
 /* --- Methods & Slots ----------------------------------------------------

   These are capable of handling Unicode objects and strings on input

--- a/Lib/test/test_codecs.py
+++ b/Lib/test/test_codecs.py
@@ -1516,6 +1516,34 @@ class TypesTest(unittest.TestCase):
        self.assertEquals(codecs.raw_unicode_escape_decode(r"\u1234"), ("\u1234", 6))
        self.assertEquals(codecs.raw_unicode_escape_decode(br"\u1234"), ("\u1234", 6))

+class Utf8bTest(unittest.TestCase):
+
+    def test_utf8(self):
+        # Bad byte
+        self.assertEqual(b"foo\x80bar".decode("utf-8", "utf8b"),
+                         "foo\udc80bar")
+        self.assertEqual("foo\udc80bar".encode("utf-8", "utf8b"),
+                         b"foo\x80bar")
+        # bad-utf-8 encoded surrogate
+        self.assertEqual(b"\xed\xb0\x80".decode("utf-8", "utf8b"),
+                         "\udced\udcb0\udc80")
+        self.assertEqual("\udced\udcb0\udc80".encode("utf-8", "utf8b"),
+                         b"\xed\xb0\x80")
+
+    def test_ascii(self):
+        # bad byte
+        self.assertEqual(b"foo\x80bar".decode("ascii", "utf8b"),
+                         "foo\udc80bar")
+        self.assertEqual("foo\udc80bar".encode("ascii", "utf8b"),
+                         b"foo\x80bar")
+
+    def test_charmap(self):
+        # bad byte: \xa5 is unmapped in iso-8859-3
+        self.assertEqual(b"foo\xa5bar".decode("iso-8859-3", "utf8b"),
+                         "foo\udca5bar")
+        self.assertEqual("foo\udca5bar".encode("iso-8859-3", "utf8b"),
+                         b"foo\xa5bar")
+

 def test_main():
    support.run_unittest(
@@ -1543,6 +1571,7 @@ def test_main():
        CharmapTest,
        WithStmtTest,
        TypesTest,
+        Utf8bTest,
    )



--- a/Lib/test/test_os.py
+++ b/Lib/test/test_os.py
@@ -7,6 +7,7 @@ import errno
 import unittest
 import warnings
 import sys
+import shutil
 from test import support

 # Tests creating TESTFN
@@ -698,9 +699,44 @@ if sys.platform != 'win32':
                    self.assertRaises(os.error, os.setregid, 0, 0)
                self.assertRaises(OverflowError, os.setregid, 1<<32, 0)
                self.assertRaises(OverflowError, os.setregid, 0, 1<<32)
+
+    class Pep383Tests(unittest.TestCase):
+        filenames = [b'foo\xf6bar', 'foo\xf6bar'.encode("utf-8")]
+
+        def setUp(self):
+            self.fsencoding = sys.getfilesystemencoding()
+            sys.setfilesystemencoding("utf-8")
+            self.dir = support.TESTFN
+            self.bdir = self.dir.encode("utf-8", "utf8b")
+            os.mkdir(self.dir)
+            self.unicodefn = []
+            for fn in self.filenames:
+                f = open(os.path.join(self.bdir, fn), "w")
+                f.close()
+                self.unicodefn.append(fn.decode("utf-8", "utf8b"))
+
+        def tearDown(self):
+            shutil.rmtree(self.dir)
+            sys.setfilesystemencoding(self.fsencoding)
+
+        def test_listdir(self):
+            expected = set(self.unicodefn)
+            found = set(os.listdir(support.TESTFN))
+            self.assertEquals(found, expected)
+
+        def test_open(self):
+            for fn in self.unicodefn:
+                f = open(os.path.join(self.dir, fn))
+                f.close()
+
+        def test_stat(self):
+            for fn in self.unicodefn:
+                os.stat(os.path.join(self.dir, fn))
 else:
    class PosixUidGidTests(unittest.TestCase):
        pass
+    class Pep383Tests(unittest.TestCase):
+        pass

 def test_main():
    support.run_unittest(
@@ -714,7 +750,8 @@ def test_main():
        ExecTests,
        Win32ErrorTests,
        TestInvalidFD,
-        PosixUidGidTests
+        PosixUidGidTests,
+        Pep383Tests
    )

 if __name__ == "__main__":

--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -12,6 +12,8 @@ What's New in Python 3.1 beta 1?
 Core and Builtins
 -----------------

+- Implement PEP 383, Non-decodable Bytes in System Character Interfaces.
+
 - Issue #5890: in subclasses of 'property' the __doc__ attribute was
  shadowed by classtype's, even if it was None.  property now
  inserts the __doc__ into the subclass instance __dict__.

--- a/Modules/_io/fileio.c
+++ b/Modules/_io/fileio.c
@@ -245,7 +245,7 @@ fileio_init(PyObject *oself, PyObject *args, PyObject *kwds)
 				return -1;

 			stringobj = PyUnicode_AsEncodedString(
-				u, Py_FileSystemDefaultEncoding, NULL);
+				u, Py_FileSystemDefaultEncoding, "utf8b");
 			Py_DECREF(u);
 			if (stringobj == NULL)
 				return -1;

--- a/Modules/posixmodule.c
+++ b/Modules/posixmodule.c
@@ -493,12 +493,14 @@ convertenviron(void)
 		char *p = strchr(*e, '=');
 		if (p == NULL)
 			continue;
-		k = PyUnicode_FromStringAndSize(*e, (int)(p-*e));
+		k = PyUnicode_Decode(*e, (int)(p-*e),
+				     Py_FileSystemDefaultEncoding, "utf8b");
 		if (k == NULL) {
 			PyErr_Clear();
 			continue;
 		}
-		v = PyUnicode_FromString(p+1);
+		v = PyUnicode_Decode(p+1, strlen(p+1),
+				     Py_FileSystemDefaultEncoding, "utf8b");
 		if (v == NULL) {
 			PyErr_Clear();
 			Py_DECREF(k);
@@ -534,6 +536,37 @@ convertenviron(void)
 	return d;
 }

+/* Convert a bytes object to a char*. Optionally lock the buffer if it is a
+   bytes array. */
+
+static char*
+bytes2str(PyObject* o, int lock)
+{
+	if(PyBytes_Check(o))
+		return PyBytes_AsString(o);
+	else if(PyByteArray_Check(o)) {
+		if (lock && PyObject_GetBuffer(o, NULL, 0) < 0)
+			/* On a bytearray, this should not fail. */
+			PyErr_BadInternalCall();
+		return PyByteArray_AsString(o);
+	} else {
+		/* The FS converter should have verified that this
+		   is either bytes or bytearray. */
+		Py_FatalError("bad object passed to bytes2str");
+		/* not reached. */
+		return "";
+	}
+}
+
+/* Release the lock, decref the object. */
+static void
+release_bytes(PyObject* o)
+{
+	if (PyByteArray_Check(o))
+		o->ob_type->tp_as_buffer->bf_releasebuffer(NULL, 0);
+	Py_DECREF(o);
+}
+

 /* Set a POSIX-specific error from errno, and return NULL */

@@ -558,10 +591,11 @@ posix_error_with_unicode_filename(Py_UNICODE* name)


 static PyObject *
-posix_error_with_allocated_filename(char* name)
+posix_error_with_allocated_filename(PyObject* name)
 {
-	PyObject *rc = PyErr_SetFromErrnoWithFilename(PyExc_OSError, name);
-	PyMem_Free(name);
+	PyObject *rc = PyErr_SetFromErrnoWithFilename(PyExc_OSError, 
+						      bytes2str(name, 0));
+	release_bytes(name);
 	return rc;
 }

@@ -728,17 +762,19 @@ unicode_file_names(void)
 static PyObject *
 posix_1str(PyObject *args, char *format, int (*func)(const char*))
 {
-	char *path1 = NULL;
+	PyObject *opath1 = NULL;
+	char *path1;
 	int res;
 	if (!PyArg_ParseTuple(args, format,
-	                      Py_FileSystemDefaultEncoding, &path1))
+	                      PyUnicode_FSConverter, &opath1))
 		return NULL;
+	path1 = bytes2str(opath1, 1);
 	Py_BEGIN_ALLOW_THREADS
 	res = (*func)(path1);
 	Py_END_ALLOW_THREADS
 	if (res < 0)
-		return posix_error_with_allocated_filename(path1);
-	PyMem_Free(path1);
+		return posix_error_with_allocated_filename(opath1);
+	release_bytes(opath1);
 	Py_INCREF(Py_None);
 	return Py_None;
 }
@@ -748,17 +784,20 @@ posix_2str(PyObject *args,
 	   char *format,
 	   int (*func)(const char *, const char *))
 {
-	char *path1 = NULL, *path2 = NULL;
+	PyObject *opath1, *opath2;
+	char *path1, *path2;
 	int res;
 	if (!PyArg_ParseTuple(args, format,
-	                      Py_FileSystemDefaultEncoding, &path1,
-	                      Py_FileSystemDefaultEncoding, &path2))
+	                      PyUnicode_FSConverter, &opath1,
+	                      PyUnicode_FSConverter, &opath2))
 		return NULL;
+	path1 = bytes2str(opath1, 1);
+	path2 = bytes2str(opath2, 1);
 	Py_BEGIN_ALLOW_THREADS
 	res = (*func)(path1, path2);
 	Py_END_ALLOW_THREADS
-	PyMem_Free(path1);
-	PyMem_Free(path2);
+	release_bytes(opath1);
+	release_bytes(opath2);
 	if (res != 0)
 		/* XXX how to report both path1 and path2??? */
 		return posix_error();
@@ -1560,8 +1599,8 @@ posix_do_stat(PyObject *self, PyObject *args,
 	      int (*wstatfunc)(const Py_UNICODE *, STRUCT_STAT *))
 {
 	STRUCT_STAT st;
-	char *path = NULL;	/* pass this to stat; do not free() it */
-	char *pathfree = NULL;  /* this memory must be free'd */
+	PyObject *opath;
+	char *path;
 	int res;
 	PyObject *result;

@@ -1590,25 +1629,24 @@ posix_do_stat(PyObject *self, PyObject *args,
 #endif

 	if (!PyArg_ParseTuple(args, format,
-	                      Py_FileSystemDefaultEncoding, &path))
+	                      PyUnicode_FSConverter, &opath))
 		return NULL;
-	pathfree = path;
-
+	path = bytes2str(opath, 1);
 	Py_BEGIN_ALLOW_THREADS
 	res = (*statfunc)(path, &st);
 	Py_END_ALLOW_THREADS

 	if (res != 0) {
 #ifdef MS_WINDOWS
-		result = win32_error("stat", pathfree);
+		result = win32_error("stat", path);
 #else
-		result = posix_error_with_filename(pathfree);
+		result = posix_error_with_filename(path);
 #endif
 	} 
 	else
 		result = _pystat_fromstructstat(&st);

-	PyMem_Free(pathfree);
+	release_bytes(opath);
 	return result;
 }

@@ -1625,6 +1663,7 @@ existence, or the inclusive-OR of R_OK, W_OK, and X_OK.");
 static PyObject *
 posix_access(PyObject *self, PyObject *args)
 {
+	PyObject *opath;
 	char *path;
 	int mode;
 	
@@ -1644,13 +1683,14 @@ posix_access(PyObject *self, PyObject *args)
 		   are also valid. */
 		PyErr_Clear();
 	}
-	if (!PyArg_ParseTuple(args, "eti:access",
-			      Py_FileSystemDefaultEncoding, &path, &mode))
+	if (!PyArg_ParseTuple(args, "O&i:access",
+			      PyUnicode_FSConverter, &opath, &mode))
 		return 0;
+	path = bytes2str(opath, 1);
 	Py_BEGIN_ALLOW_THREADS
 	attr = GetFileAttributesA(path);
 	Py_END_ALLOW_THREADS
-	PyMem_Free(path);
+	release_bytes(opath);
 finish:
 	if (attr == 0xFFFFFFFF)
 		/* File does not exist, or cannot read attributes */
@@ -1663,13 +1703,14 @@ finish:
 			       || (attr & FILE_ATTRIBUTE_DIRECTORY));
 #else
 	int res;
-	if (!PyArg_ParseTuple(args, "eti:access", 
-			      Py_FileSystemDefaultEncoding, &path, &mode))
+	if (!PyArg_ParseTuple(args, "O&i:access", 
+			      PyUnicode_FSConverter, &opath, &mode))
 		return NULL;
+	path = bytes2str(opath, 1);
 	Py_BEGIN_ALLOW_THREADS
 	res = access(path, mode);
 	Py_END_ALLOW_THREADS
-	PyMem_Free(path);
+	release_bytes(opath);
 	return PyBool_FromLong(res == 0);
 #endif
 }
@@ -1750,11 +1791,11 @@ posix_chdir(PyObject *self, PyObject *args)
 #ifdef MS_WINDOWS
 	return win32_1str(args, "chdir", "y:chdir", win32_chdir, "U:chdir", win32_wchdir);
 #elif defined(PYOS_OS2) && defined(PYCC_GCC)
-	return posix_1str(args, "et:chdir", _chdir2);
+	return posix_1str(args, "O&:chdir", _chdir2);
 #elif defined(__VMS)
-	return posix_1str(args, "et:chdir", (int (*)(const char *))chdir);
+	return posix_1str(args, "O&:chdir", (int (*)(const char *))chdir);
 #else
-	return posix_1str(args, "et:chdir", chdir);
+	return posix_1str(args, "O&:chdir", chdir);
 #endif
 }

@@ -1779,6 +1820,7 @@ Change the access permissions of a file.");
 static PyObject *
 posix_chmod(PyObject *self, PyObject *args)
 {
+	PyObject *opath = NULL;
 	char *path = NULL;
 	int i;
 	int res;
@@ -1809,9 +1851,10 @@ posix_chmod(PyObject *self, PyObject *args)
 		   are also valid. */
 		PyErr_Clear();
 	}
-	if (!PyArg_ParseTuple(args, "eti:chmod", Py_FileSystemDefaultEncoding,
-	                      &path, &i))
+	if (!PyArg_ParseTuple(args, "O&i:chmod", PyUnicode_FSConverter,
+	                      &opath, &i))
 		return NULL;
+	path = bytes2str(opath, 1);
 	Py_BEGIN_ALLOW_THREADS
 	attr = GetFileAttributesA(path);
 	if (attr != 0xFFFFFFFF) {
@@ -1826,22 +1869,23 @@ posix_chmod(PyObject *self, PyObject *args)
 	Py_END_ALLOW_THREADS
 	if (!res) {
 		win32_error("chmod", path);
-		PyMem_Free(path);
+		release_bytes(opath);
 		return NULL;
 	}
-	PyMem_Free(path);
+	release_bytes(opath);
 	Py_INCREF(Py_None);
 	return Py_None;
 #else /* Py_WIN_WIDE_FILENAMES */
-	if (!PyArg_ParseTuple(args, "eti:chmod", Py_FileSystemDefaultEncoding,
-	                      &path, &i))
+	if (!PyArg_ParseTuple(args, "O&i:chmod", PyUnicode_FSConverter,
+	                      &opath, &i))
 		return NULL;
+	path = bytes2str(opath, 1);
 	Py_BEGIN_ALLOW_THREADS
 	res = chmod(path, i);
 	Py_END_ALLOW_THREADS
 	if (res < 0)
-		return posix_error_with_allocated_filename(path);
-	PyMem_Free(path);
+		return posix_error_with_allocated_filename(opath);
+	release_bytes(opath);
 	Py_INCREF(Py_None);
 	return Py_None;
 #endif
@@ -1877,18 +1921,20 @@ affects the link itself rather than the target.");
 static PyObject *
 posix_lchmod(PyObject *self, PyObject *args)
 {
-	char *path = NULL;
+	PyObject *opath;
+	char *path;
 	int i;
 	int res;
-	if (!PyArg_ParseTuple(args, "eti:lchmod", Py_FileSystemDefaultEncoding,
-	                      &path, &i))
+	if (!PyArg_ParseTuple(args, "O&i:lchmod", PyUnicode_FSConverter,
+	                      &opath, &i))
 		return NULL;
+	path = bytes2str(opath, 1)
 	Py_BEGIN_ALLOW_THREADS
 	res = lchmod(path, i);
 	Py_END_ALLOW_THREADS
 	if (res < 0)
-		return posix_error_with_allocated_filename(path);
-	PyMem_Free(path);
+		return posix_error_with_allocated_filename(opath);
+	release_bytes(opath);
 	Py_RETURN_NONE;
 }
 #endif /* HAVE_LCHMOD */
@@ -1902,18 +1948,20 @@ Set file flags.");
 static PyObject *
 posix_chflags(PyObject *self, PyObject *args)
 {
+	PyObject *opath;
 	char *path;
 	unsigned long flags;
 	int res;
-	if (!PyArg_ParseTuple(args, "etk:chflags",
-			      Py_FileSystemDefaultEncoding, &path, &flags))
+	if (!PyArg_ParseTuple(args, "O&k:chflags",
+			      PyUnicode_FSConverter, &opath, &flags))
 		return NULL;
+	path = bytes2str(opath, 1);
 	Py_BEGIN_ALLOW_THREADS
 	res = chflags(path, flags);
 	Py_END_ALLOW_THREADS
 	if (res < 0)
-		return posix_error_with_allocated_filename(path);
-	PyMem_Free(path);
+		return posix_error_with_allocated_filename(opath);
+	release_bytes(opath);
 	Py_INCREF(Py_None);
 	return Py_None;
 }
@@ -1928,18 +1976,20 @@ This function will not follow symbolic links.");
 static PyObject *
 posix_lchflags(PyObject *self, PyObject *args)
 {
+	PyObject *opath;
 	char *path;
 	unsigned long flags;
 	int res;
-	if (!PyArg_ParseTuple(args, "etk:lchflags",
-			      Py_FileSystemDefaultEncoding, &path, &flags))
+	if (!PyArg_ParseTuple(args, "O&k:lchflags",
+			      PyUnicode_FSConverter, &path, &flags))
 		return NULL;
+	path = bytes2str(opath, 1);
 	Py_BEGIN_ALLOW_THREADS
 	res = lchflags(path, flags);
 	Py_END_ALLOW_THREADS
 	if (res < 0)
-		return posix_error_with_allocated_filename(path);
-	PyMem_Free(path);
+		return posix_error_with_allocated_filename(opath);
+	release_bytes(opath);
 	Py_INCREF(Py_None);
 	return Py_None;
 }
@@ -1953,7 +2003,7 @@ Change root directory to path.");
 static PyObject *
 posix_chroot(PyObject *self, PyObject *args)
 {
-	return posix_1str(args, "et:chroot", chroot);
+	return posix_1str(args, "O&:chroot", chroot);
 }
 #endif

@@ -1996,19 +2046,21 @@ Change the owner and group id of path to the numeric uid and gid.");
 static PyObject *
 posix_chown(PyObject *self, PyObject *args)
 {
-	char *path = NULL;
+	PyObject *opath;
+	char *path;
 	long uid, gid;
 	int res;
-	if (!PyArg_ParseTuple(args, "etll:chown",
-	                      Py_FileSystemDefaultEncoding, &path,
+	if (!PyArg_ParseTuple(args, "O&ll:chown",
+	                      PyUnicode_FSConverter, &opath,
 	                      &uid, &gid))
 		return NULL;
+	path = bytes2str(opath, 1);
 	Py_BEGIN_ALLOW_THREADS
 	res = chown(path, (uid_t) uid, (gid_t) gid);
 	Py_END_ALLOW_THREADS
 	if (res < 0)
-		return posix_error_with_allocated_filename(path);
-	PyMem_Free(path);
+		return posix_error_with_allocated_filename(opath);
+	release_bytes(opath);
 	Py_INCREF(Py_None);
 	return Py_None;
 }
@@ -2045,19 +2097,21 @@ This function will not follow symbolic links.");
 static PyObject *
 posix_lchown(PyObject *self, PyObject *args)
 {
-	char *path = NULL;
+	PyObject *opath;
+	char *path;
 	int uid, gid;
 	int res;
-	if (!PyArg_ParseTuple(args, "etii:lchown",
-	                      Py_FileSystemDefaultEncoding, &path,
+	if (!PyArg_ParseTuple(args, "O&ii:lchown",
+	                      PyUnicode_FSConverter, &opath,
 	                      &uid, &gid))
 		return NULL;
+	path = bytes2str(opath, 1);
 	Py_BEGIN_ALLOW_THREADS
 	res = lchown(path, (uid_t) uid, (gid_t) gid);
 	Py_END_ALLOW_THREADS
 	if (res < 0)
-		return posix_error_with_allocated_filename(path);
-	PyMem_Free(path);
+		return posix_error_with_allocated_filename(opath);
+	release_bytes(opath);
 	Py_INCREF(Py_None);
 	return Py_None;
 }
@@ -2113,7 +2167,7 @@ posix_getcwd(int use_bytes)
 		return posix_error();
 	if (use_bytes)
 		return PyBytes_FromStringAndSize(buf, strlen(buf));
-	return PyUnicode_Decode(buf, strlen(buf), Py_FileSystemDefaultEncoding,"strict");
+	return PyUnicode_Decode(buf, strlen(buf), Py_FileSystemDefaultEncoding,"utf8b");
 }

 PyDoc_STRVAR(posix_getcwd__doc__,
@@ -2146,7 +2200,7 @@ Create a hard link to a file.");
 static PyObject *
 posix_link(PyObject *self, PyObject *args)
 {
-	return posix_2str(args, "etet:link", link);
+	return posix_2str(args, "O&O&:link", link);
 }
 #endif /* HAVE_LINK */

@@ -2171,6 +2225,7 @@ posix_listdir(PyObject *self, PyObject *args)
 	HANDLE hFindFile;
 	BOOL result;
 	WIN32_FIND_DATA FileData;
+	PyObject *opath;
 	char namebuf[MAX_PATH+5]; /* Overallocate for \\*.*\0 */
 	char *bufptr = namebuf;
 	Py_ssize_t len = sizeof(namebuf)-5; /* only claim to have space for MAX_PATH */
@@ -2260,9 +2315,16 @@ posix_listdir(PyObject *self, PyObject *args)
 	}
 #endif

-	if (!PyArg_ParseTuple(args, "et#:listdir",
-	                      Py_FileSystemDefaultEncoding, &bufptr, &len))
+	if (!PyArg_ParseTuple(args, "O&:listdir",
+	                      PyUnicode_FSConverter, &opath))
+		return NULL;
+	if (PyObject_Size(opath)+1 > MAX_PATH) {
+		PyErr_SetString(PyExc_ValueError, "path too long");
+		Py_DECREF(opath);
 		return NULL;
+	}
+	strcpy(namebuf, bytes2str(opath, 0));
+	len = PyObject_Size(opath);
 	if (len > 0) {
 		char ch = namebuf[len-1];
 		if (ch != SEP && ch != ALTSEP && ch != ':')
@@ -2324,6 +2386,7 @@ posix_listdir(PyObject *self, PyObject *args)
 #ifndef MAX_PATH
 #define MAX_PATH    CCHMAXPATH
 #endif
+    PyObject *oname;
    char *name, *pt;
    Py_ssize_t len;
    PyObject *d, *v;
@@ -2333,11 +2396,13 @@ posix_listdir(PyObject *self, PyObject *args)
    FILEFINDBUF3   ep;
    APIRET rc;

-    if (!PyArg_ParseTuple(args, "et#:listdir", 
-                          Py_FileSystemDefaultEncoding, &name, &len))
+    if (!PyArg_ParseTuple(args, "O&:listdir", 
+                          PyUnicode_FSConverter, &oname))
        return NULL;
+    name = bytes2str(oname);
+    len = PyObject_Size(oname);
    if (len >= MAX_PATH) {
-        PyMem_Free(name);
+        release_bytes(oname);
        PyErr_SetString(PyExc_ValueError, "path too long");
        return NULL;
    }
@@ -2350,7 +2415,7 @@ posix_listdir(PyObject *self, PyObject *args)
    strcpy(namebuf + len, "*.*");

    if ((d = PyList_New(0)) == NULL) {
-        PyMem_Free(name);
+        release_bytes(oname);
        return NULL;
    }

@@ -2363,7 +2428,7 @@ posix_listdir(PyObject *self, PyObject *args)

    if (rc != NO_ERROR) {
        errno = ENOENT;
-        return posix_error_with_allocated_filename(name);
+        return posix_error_with_allocated_filename(oname);
    }

    if (srchcnt > 0) { /* If Directory is NOT Totally Empty, */
@@ -2393,11 +2458,11 @@ posix_listdir(PyObject *self, PyObject *args)
        } while (DosFindNext(hdir, &ep, sizeof(ep), &srchcnt) == NO_ERROR && srchcnt > 0);
    }

-    PyMem_Free(name);
+    release_bytes(oname);
    return d;
 #else
-
-	char *name = NULL;
+	PyObject *oname;
+	char *name;
 	PyObject *d, *v;
 	DIR *dirp;
 	struct dirent *ep;
@@ -2408,14 +2473,15 @@ posix_listdir(PyObject *self, PyObject *args)
 		arg_is_unicode = 0;
 		PyErr_Clear();
 	}
-	if (!PyArg_ParseTuple(args, "et:listdir", Py_FileSystemDefaultEncoding, &name))
+	if (!PyArg_ParseTuple(args, "O&:listdir", PyUnicode_FSConverter, &oname))
 		return NULL;
+	name = bytes2str(oname, 1);
 	if ((dirp = opendir(name)) == NULL) {
-		return posix_error_with_allocated_filename(name);
+		return posix_error_with_allocated_filename(oname);
 	}
 	if ((d = PyList_New(0)) == NULL) {
 		closedir(dirp);
-		PyMem_Free(name);
+		release_bytes(oname);
 		return NULL;
 	}
 	for (;;) {
@@ -2429,7 +2495,7 @@ posix_listdir(PyObject *self, PyObject *args)
 			} else {
 				closedir(dirp);
 				Py_DECREF(d);
-				return posix_error_with_allocated_filename(name);
+				return posix_error_with_allocated_filename(oname);
 			}
 		}
 		if (ep->d_name[0] == '.' &&
@@ -2447,18 +2513,16 @@ posix_listdir(PyObject *self, PyObject *args)

 			w = PyUnicode_FromEncodedObject(v,
 					Py_FileSystemDefaultEncoding,
-					"strict");
-			if (w != NULL) {
-				Py_DECREF(v);
+					"utf8b");
+			Py_DECREF(v);
+			if (w != NULL)
 				v = w;
-			}
 			else {
-				/* Ignore undecodable filenames, as discussed
-				 * in issue 3187. To include these,
-				 * use getcwdb(). */
-				PyErr_Clear();
-				Py_DECREF(v);
-				continue;
+				/* Encoding failed to decode ASCII bytes.
+				   Raise exception. */
+				Py_DECREF(d);
+				d = NULL;
+				break;
 			}
 		}
 		if (PyList_Append(d, v) != 0) {
@@ -2470,7 +2534,7 @@ posix_listdir(PyObject *self, PyObject *args)
 		Py_DECREF(v);
 	}
 	closedir(dirp);
-	PyMem_Free(name);
+	release_bytes(oname);

 	return d;

@@ -2482,10 +2546,8 @@ posix_listdir(PyObject *self, PyObject *args)
 static PyObject *
 posix__getfullpathname(PyObject *self, PyObject *args)
 {
-	/* assume encoded strings won't more than double no of chars */
-	char inbuf[MAX_PATH*2];
-	char *inbufp = inbuf;
-	Py_ssize_t insize = sizeof(inbuf);
+	PyObject *opath;
+	char *path;
 	char outbuf[MAX_PATH*2];
 	char *temp;
 #ifdef Py_WIN_WIDE_FILENAMES
@@ -2519,13 +2581,17 @@ posix__getfullpathname(PyObject *self, PyObject *args)
 		PyErr_Clear();
 	}
 #endif
-	if (!PyArg_ParseTuple (args, "et#:_getfullpathname",
-	                       Py_FileSystemDefaultEncoding, &inbufp,
-	                       &insize))
+	if (!PyArg_ParseTuple (args, "O&:_getfullpathname",
+	                       PyUnicode_FSConverter, &opath))
 		return NULL;
-	if (!GetFullPathName(inbuf, sizeof(outbuf)/sizeof(outbuf[0]),
-	                     outbuf, &temp))
-		return win32_error("GetFullPathName", inbuf);
+	path = bytes2str(opath, 1);
+	if (!GetFullPathName(path, sizeof(outbuf)/sizeof(outbuf[0]),
+	                     outbuf, &temp)) {
+		win32_error("GetFullPathName", path);
+		release_bytes(opath);
+		return NULL;
+	}
+	release_bytes(opath);
 	if (PyUnicode_Check(PyTuple_GetItem(args, 0))) {
 		return PyUnicode_Decode(outbuf, strlen(outbuf),
 			Py_FileSystemDefaultEncoding, NULL);
@@ -2542,7 +2608,8 @@ static PyObject *
 posix_mkdir(PyObject *self, PyObject *args)
 {
 	int res;
-	char *path = NULL;
+	PyObject *opath;
+	char *path;
 	int mode = 0777;

 #ifdef Py_WIN_WIDE_FILENAMES
@@ -2563,9 +2630,10 @@ posix_mkdir(PyObject *self, PyObject *args)
 		   are also valid. */
 		PyErr_Clear();
 	}
-	if (!PyArg_ParseTuple(args, "et|i:mkdir",
-	                      Py_FileSystemDefaultEncoding, &path, &mode))
+	if (!PyArg_ParseTuple(args, "O&|i:mkdir",
+	                      PyUnicode_FSConverter, &opath, &mode))
 		return NULL;
+	path = bytes2str(opath, 1);
 	Py_BEGIN_ALLOW_THREADS
 	/* PyUnicode_AS_UNICODE OK without thread lock as
 	   it is a simple dereference. */
@@ -2573,17 +2641,18 @@ posix_mkdir(PyObject *self, PyObject *args)
 	Py_END_ALLOW_THREADS
 	if (!res) {
 		win32_error("mkdir", path);
-		PyMem_Free(path);
+		release_bytes(opath);
 		return NULL;
 	}
-	PyMem_Free(path);
+	release_bytes(opath);
 	Py_INCREF(Py_None);
 	return Py_None;
 #else

-	if (!PyArg_ParseTuple(args, "et|i:mkdir",
-	                      Py_FileSystemDefaultEncoding, &path, &mode))
+	if (!PyArg_ParseTuple(args, "O&|i:mkdir",
+	                      PyUnicode_FSConverter, &opath, &mode))
 		return NULL;
+	path = bytes2str(opath, 1);
 	Py_BEGIN_ALLOW_THREADS
 #if ( defined(__WATCOMC__) || defined(PYCC_VACPP) ) && !defined(__QNX__)
 	res = mkdir(path);
@@ -2592,8 +2661,8 @@ posix_mkdir(PyObject *self, PyObject *args)
 #endif
 	Py_END_ALLOW_THREADS
 	if (res < 0)
-		return posix_error_with_allocated_filename(path);
-	PyMem_Free(path);
+		return posix_error_with_allocated_filename(opath);
+	release_bytes(opath);
 	Py_INCREF(Py_None);
 	return Py_None;
 #endif
@@ -2685,7 +2754,7 @@ error:
 	Py_INCREF(Py_None);
 	return Py_None;
 #else
-	return posix_2str(args, "etet:rename", rename);
+	return posix_2str(args, "O&O&:rename", rename);
 #endif
 }

@@ -2700,7 +2769,7 @@ posix_rmdir(PyObject *self, PyObject *args)
 #ifdef MS_WINDOWS
 	return win32_1str(args, "rmdir", "y:rmdir", RemoveDirectoryA, "U:rmdir", RemoveDirectoryW);
 #else
-	return posix_1str(args, "et:rmdir", rmdir);
+	return posix_1str(args, "O&:rmdir", rmdir);
 #endif
 }

@@ -2713,9 +2782,9 @@ static PyObject *
 posix_stat(PyObject *self, PyObject *args)
 {
 #ifdef MS_WINDOWS
-	return posix_do_stat(self, args, "et:stat", STAT, "U:stat", win32_wstat);
+	return posix_do_stat(self, args, "O&:stat", STAT, "U:stat", win32_wstat);
 #else
-	return posix_do_stat(self, args, "et:stat", STAT, NULL, NULL);
+	return posix_do_stat(self, args, "O&:stat", STAT, NULL, NULL);
 #endif
 }

@@ -2781,7 +2850,7 @@ posix_unlink(PyObject *self, PyObject *args)
 #ifdef MS_WINDOWS
 	return win32_1str(args, "remove", "y:remove", DeleteFileA, "U:remove", DeleteFileW);
 #else
-	return posix_1str(args, "et:remove", unlink);
+	return posix_1str(args, "O&:remove", unlink);
 #endif
 }

@@ -2853,7 +2922,8 @@ posix_utime(PyObject *self, PyObject *args)
 	PyObject *arg;
 	PyUnicodeObject *obwpath;
 	wchar_t *wpath = NULL;
-	char *apath = NULL;
+	PyObject *oapath;
+	char *apath;
 	HANDLE hFile;
 	long atimesec, mtimesec, ausec, musec;
 	FILETIME atime, mtime;
@@ -2875,9 +2945,10 @@ posix_utime(PyObject *self, PyObject *args)
 			PyErr_Clear();
 	}
 	if (!wpath) {
-		if (!PyArg_ParseTuple(args, "etO:utime",
-				Py_FileSystemDefaultEncoding, &apath, &arg))
+		if (!PyArg_ParseTuple(args, "O&O:utime",
+				PyUnicode_FSConverter, &oapath, &arg))
 			return NULL;
+		apath = bytes2str(oapath, 1);
 		Py_BEGIN_ALLOW_THREADS
 		hFile = CreateFileA(apath, FILE_WRITE_ATTRIBUTES, 0,
 				    NULL, OPEN_EXISTING,
@@ -2885,10 +2956,10 @@ posix_utime(PyObject *self, PyObject *args)
 		Py_END_ALLOW_THREADS
 		if (hFile == INVALID_HANDLE_VALUE) {
 			win32_error("utime", apath);
-			PyMem_Free(apath);
+			release_bytes(oapath);
 			return NULL;
 		}
-		PyMem_Free(apath);
+		release_bytes(oapath);
 	}
 	
 	if (arg == Py_None) {
@@ -2929,7 +3000,8 @@ done:
 	return result;
 #else /* Py_WIN_WIDE_FILENAMES */

-	char *path = NULL;
+	PyObject *opath;
+	char *path;
 	long atime, mtime, ausec, musec;
 	int res;
 	PyObject* arg;
@@ -2952,9 +3024,10 @@ done:
 #endif /* HAVE_UTIMES */


-	if (!PyArg_ParseTuple(args, "etO:utime",
-				  Py_FileSystemDefaultEncoding, &path, &arg))
+	if (!PyArg_ParseTuple(args, "O&O:utime",
+				  PyUnicode_FSConverter, &opath, &arg))
 		return NULL;
+	path = bytes2str(opath, 1);
 	if (arg == Py_None) {
 		/* optional time values not given */
 		Py_BEGIN_ALLOW_THREADS
@@ -2964,18 +3037,18 @@ done:
 	else if (!PyTuple_Check(arg) || PyTuple_Size(arg) != 2) {
 		PyErr_SetString(PyExc_TypeError,
 				"utime() arg 2 must be a tuple (atime, mtime)");
-		PyMem_Free(path);
+		release_bytes(opath);
 		return NULL;
 	}
 	else {
 		if (extract_time(PyTuple_GET_ITEM(arg, 0),
 				 &atime, &ausec) == -1) {
-			PyMem_Free(path);
+			release_bytes(opath);
 			return NULL;
 		}
 		if (extract_time(PyTuple_GET_ITEM(arg, 1),
 				 &mtime, &musec) == -1) {
-			PyMem_Free(path);
+			release_bytes(opath);
 			return NULL;
 		}
 		ATIME = atime;
@@ -2993,9 +3066,9 @@ done:
 #endif /* HAVE_UTIMES */
 	}
 	if (res < 0) {
-		return posix_error_with_allocated_filename(path);
+		return posix_error_with_allocated_filename(opath);
 	}
-	PyMem_Free(path);
+	release_bytes(opath);
 	Py_INCREF(Py_None);
 	return Py_None;
 #undef UTIME_ARG
@@ -3030,6 +3103,22 @@ free_string_array(char **array, Py_ssize_t count)
 		PyMem_Free(array[i]);
 	PyMem_DEL(array);
 }
+
+int fsconvert_strdup(PyObject *o, char**out)
+{
+	PyObject *bytes;
+	Py_ssize_t size;
+	if (!PyUnicode_FSConverter(o, &bytes))
+		return 0;
+	size = PyObject_Size(bytes);
+	*out = PyMem_Malloc(size+1);
+	if (!*out)
+		return 0;
+	/* Don't lock bytes, as we hold the GIL */
+	memcpy(*out, bytes2str(bytes, 0), size+1);
+	Py_DECREF(bytes);
+	return 1;
+}
 #endif


@@ -3044,6 +3133,7 @@ Execute an executable path with arguments, replacing current process.\n\
 static PyObject *
 posix_execv(PyObject *self, PyObject *args)
 {
+	PyObject *opath;
 	char *path;
 	PyObject *argv;
 	char **argvlist;
@@ -3053,10 +3143,11 @@ posix_execv(PyObject *self, PyObject *args)
 	/* execv has two arguments: (path, argv), where
 	   argv is a list or tuple of strings. */

-	if (!PyArg_ParseTuple(args, "etO:execv",
-                              Py_FileSystemDefaultEncoding,
-                              &path, &argv))
+	if (!PyArg_ParseTuple(args, "O&O:execv",
+                              PyUnicode_FSConverter,
+                              &opath, &argv))
 		return NULL;
+	path = bytes2str(opath, 1);
 	if (PyList_Check(argv)) {
 		argc = PyList_Size(argv);
 		getitem = PyList_GetItem;
@@ -3067,28 +3158,27 @@ posix_execv(PyObject *self, PyObject *args)
 	}
 	else {
 		PyErr_SetString(PyExc_TypeError, "execv() arg 2 must be a tuple or list");
-                PyMem_Free(path);
+                release_bytes(opath);
 		return NULL;
 	}
 	if (argc < 1) {
 		PyErr_SetString(PyExc_ValueError, "execv() arg 2 must not be empty");
-                PyMem_Free(path);
+                release_bytes(opath);
 		return NULL;
 	}

 	argvlist = PyMem_NEW(char *, argc+1);
 	if (argvlist == NULL) {
-		PyMem_Free(path);
+		release_bytes(opath);
 		return PyErr_NoMemory();
 	}
 	for (i = 0; i < argc; i++) {
-		if (!PyArg_Parse((*getitem)(argv, i), "et",
-				 Py_FileSystemDefaultEncoding,
-				 &argvlist[i])) {
+		if (!fsconvert_strdup((*getitem)(argv, i),
+				      &argvlist[i])) {
 			free_string_array(argvlist, i);
 			PyErr_SetString(PyExc_TypeError,
 					"execv() arg 2 must contain only strings");
-			PyMem_Free(path);
+			release_bytes(opath);
 			return NULL;

 		}
@@ -3100,7 +3190,7 @@ posix_execv(PyObject *self, PyObject *args)
 	/* If we get here it's definitely an error */

 	free_string_array(argvlist, argc);
-	PyMem_Free(path);
+	release_bytes(opath);
 	return posix_error();
 }

@@ -3116,6 +3206,7 @@ Execute a path with arguments and environment, replacing current process.\n\
 static PyObject *
 posix_execve(PyObject *self, PyObject *args)
 {
+	PyObject *opath;
 	char *path;
 	PyObject *argv, *env;
 	char **argvlist;
@@ -3129,10 +3220,11 @@ posix_execve(PyObject *self, PyObject *args)
 	   argv is a list or tuple of strings and env is a dictionary
 	   like posix.environ. */

-	if (!PyArg_ParseTuple(args, "etOO:execve",
-			      Py_FileSystemDefaultEncoding,
-			      &path, &argv, &env))
+	if (!PyArg_ParseTuple(args, "O&OO:execve",
+			      PyUnicode_FSConverter,
+			      &opath, &argv, &env))
 		return NULL;
+	path = bytes2str(opath, 1);
 	if (PyList_Check(argv)) {
 		argc = PyList_Size(argv);
 		getitem = PyList_GetItem;
@@ -3158,10 +3250,8 @@ posix_execve(PyObject *self, PyObject *args)
 		goto fail_0;
 	}
 	for (i = 0; i < argc; i++) {
-		if (!PyArg_Parse((*getitem)(argv, i),
-				 "et;execve() arg 2 must contain only strings",
-				 Py_FileSystemDefaultEncoding,
-				 &argvlist[i]))
+		if (!fsconvert_strdup((*getitem)(argv, i),
+				      &argvlist[i]))
 		{
 			lastarg = i;
 			goto fail_1;
@@ -3243,7 +3333,7 @@ posix_execve(PyObject *self, PyObject *args)
 	Py_XDECREF(vals);
 	Py_XDECREF(keys);
  fail_0:
-	PyMem_Free(path);
+	release_bytes(opath);
 	return NULL;
 }
 #endif /* HAVE_EXECV */
@@ -3261,6 +3351,7 @@ Execute the program 'path' in a new process.\n\
 static PyObject *
 posix_spawnv(PyObject *self, PyObject *args)
 {
+	PyObject *opath;
 	char *path;
 	PyObject *argv;
 	char **argvlist;
@@ -3272,10 +3363,11 @@ posix_spawnv(PyObject *self, PyObject *args)
 	/* spawnv has three arguments: (mode, path, argv), where
 	   argv is a list or tuple of strings. */

-	if (!PyArg_ParseTuple(args, "ietO:spawnv", &mode,
-			      Py_FileSystemDefaultEncoding,
-			      &path, &argv))
+	if (!PyArg_ParseTuple(args, "iO&O:spawnv", &mode,
+			      PyUnicode_FSConverter,
+			      &opath, &argv))
 		return NULL;
+	path = bytes2str(opath, 1);
 	if (PyList_Check(argv)) {
 		argc = PyList_Size(argv);
 		getitem = PyList_GetItem;
@@ -3287,24 +3379,23 @@ posix_spawnv(PyObject *self, PyObject *args)
 	else {
 		PyErr_SetString(PyExc_TypeError,
 				"spawnv() arg 2 must be a tuple or list");
-		PyMem_Free(path);
+		release_bytes(opath);
 		return NULL;
 	}

 	argvlist = PyMem_NEW(char *, argc+1);
 	if (argvlist == NULL) {
-		PyMem_Free(path);
+		release_bytes(opath);
 		return PyErr_NoMemory();
 	}
 	for (i = 0; i < argc; i++) {
-		if (!PyArg_Parse((*getitem)(argv, i), "et",
-				 Py_FileSystemDefaultEncoding,
-				 &argvlist[i])) {
+		if (!fsconvert_strdup((*getitem)(argv, i),
+				      &argvlist[i])) {
 			free_string_array(argvlist, i);
 			PyErr_SetString(
 				PyExc_TypeError,
 				"spawnv() arg 2 must contain only strings");
-			PyMem_Free(path);
+			release_bytes(opath);
 			return NULL;
 		}
 	}
@@ -3324,7 +3415,7 @@ posix_spawnv(PyObject *self, PyObject *args)
 #endif

 	free_string_array(argvlist, argc);
-	PyMem_Free(path);
+	release_bytes(opath);

 	if (spawnval == -1)
 		return posix_error();
@@ -3349,6 +3440,7 @@ Execute the program 'path' in a new process.\n\
 static PyObject *
 posix_spawnve(PyObject *self, PyObject *args)
 {
+	PyObject *opath;
 	char *path;
 	PyObject *argv, *env;
 	char **argvlist;
@@ -3364,10 +3456,11 @@ posix_spawnve(PyObject *self, PyObject *args)
 	   argv is a list or tuple of strings and env is a dictionary
 	   like posix.environ. */

-	if (!PyArg_ParseTuple(args, "ietOO:spawnve", &mode,
-			      Py_FileSystemDefaultEncoding,
-			      &path, &argv, &env))
+	if (!PyArg_ParseTuple(args, "iO&OO:spawnve", &mode,
+			      PyUnicode_FSConverter,
+			      &opath, &argv, &env))
 		return NULL;
+	path = bytes2str(opath, 1);
 	if (PyList_Check(argv)) {
 		argc = PyList_Size(argv);
 		getitem = PyList_GetItem;
@@ -3393,10 +3486,8 @@ posix_spawnve(PyObject *self, PyObject *args)
 		goto fail_0;
 	}
 	for (i = 0; i < argc; i++) {
-		if (!PyArg_Parse((*getitem)(argv, i),
-			     "et;spawnve() arg 2 must contain only strings",
-				 Py_FileSystemDefaultEncoding,
-				 &argvlist[i]))
+		if (!fsconvert_strdup((*getitem)(argv, i),
+				      &argvlist[i]))
 		{
 			lastarg = i;
 			goto fail_1;
@@ -3486,7 +3577,7 @@ posix_spawnve(PyObject *self, PyObject *args)
 	Py_XDECREF(vals);
 	Py_XDECREF(keys);
  fail_0:
-	PyMem_Free(path);
+	release_bytes(opath);
 	return res;
 }

@@ -3504,6 +3595,7 @@ search path to find the file.\n\
 static PyObject *
 posix_spawnvp(PyObject *self, PyObject *args)
 {
+	PyObject *opath;
 	char *path;
 	PyObject *argv;
 	char **argvlist;
@@ -3514,10 +3606,11 @@ posix_spawnvp(PyObject *self, PyObject *args)
 	/* spawnvp has three arguments: (mode, path, argv), where
 	   argv is a list or tuple of strings. */

-	if (!PyArg_ParseTuple(args, "ietO:spawnvp", &mode,
-			      Py_FileSystemDefaultEncoding,
-			      &path, &argv))
+	if (!PyArg_ParseTuple(args, "iO&O:spawnvp", &mode,
+			      PyUnicode_FSConverter,
+			      &opath, &argv))
 		return NULL;
+	path = bytes2str(opath);
 	if (PyList_Check(argv)) {
 		argc = PyList_Size(argv);
 		getitem = PyList_GetItem;
@@ -3529,24 +3622,23 @@ posix_spawnvp(PyObject *self, PyObject *args)
 	else {
 		PyErr_SetString(PyExc_TypeError,
 				"spawnvp() arg 2 must be a tuple or list");
-		PyMem_Free(path);
+		release_bytes(opath);
 		return NULL;
 	}

 	argvlist = PyMem_NEW(char *, argc+1);
 	if (argvlist == NULL) {
-		PyMem_Free(path);
+		release_bytes(opath);
 		return PyErr_NoMemory();
 	}
 	for (i = 0; i < argc; i++) {
-		if (!PyArg_Parse((*getitem)(argv, i), "et",
-				 Py_FileSystemDefaultEncoding,
-				 &argvlist[i])) {
+		if (!fsconvert_strdup((*getitem)(argv, i),
+				      &argvlist[i])) {
 			free_string_array(argvlist, i);
 			PyErr_SetString(
 				PyExc_TypeError,
 				"spawnvp() arg 2 must contain only strings");
-			PyMem_Free(path);
+			release_bytes(opath);
 			return NULL;
 		}
 	}
@@ -3561,7 +3653,7 @@ posix_spawnvp(PyObject *self, PyObject *args)
 	Py_END_ALLOW_THREADS

 	free_string_array(argvlist, argc);
-	PyMem_Free(path);
+	release_bytes(opath);

 	if (spawnval == -1)
 		return posix_error();
@@ -3583,6 +3675,7 @@ search path to find the file.\n\
 static PyObject *
 posix_spawnvpe(PyObject *self, PyObject *args)
 {
+	PyObject *opath
 	char *path;
 	PyObject *argv, *env;
 	char **argvlist;
@@ -3598,9 +3691,10 @@ posix_spawnvpe(PyObject *self, PyObject *args)
 	   like posix.environ. */

 	if (!PyArg_ParseTuple(args, "ietOO:spawnvpe", &mode,
-			      Py_FileSystemDefaultEncoding,
-			      &path, &argv, &env))
+			      PyUnicode_FSConverter,
+			      &opath, &argv, &env))
 		return NULL;
+	path = bytes2str(opath);
 	if (PyList_Check(argv)) {
 		argc = PyList_Size(argv);
 		getitem = PyList_GetItem;
@@ -3626,10 +3720,8 @@ posix_spawnvpe(PyObject *self, PyObject *args)
 		goto fail_0;
 	}
 	for (i = 0; i < argc; i++) {
-		if (!PyArg_Parse((*getitem)(argv, i),
-			     "et;spawnvpe() arg 2 must contain only strings",
-				 Py_FileSystemDefaultEncoding,
-				 &argvlist[i]))
+		if (!fsconvert_strdup((*getitem)(argv, i),
+				      &argvlist[i]))
 		{
 			lastarg = i;
 			goto fail_1;
@@ -3710,7 +3802,7 @@ posix_spawnvpe(PyObject *self, PyObject *args)
 	Py_XDECREF(vals);
 	Py_XDECREF(keys);
  fail_0:
-	PyMem_Free(path);
+	release_bytes(opath);
 	return res;
 }
 #endif /* PYOS_OS2 */
@@ -4549,12 +4641,12 @@ static PyObject *
 posix_lstat(PyObject *self, PyObject *args)
 {
 #ifdef HAVE_LSTAT
-	return posix_do_stat(self, args, "et:lstat", lstat, NULL, NULL);
+	return posix_do_stat(self, args, "O&:lstat", lstat, NULL, NULL);
 #else /* !HAVE_LSTAT */
 #ifdef MS_WINDOWS
-	return posix_do_stat(self, args, "et:lstat", STAT, "U:lstat", win32_wstat);
+	return posix_do_stat(self, args, "O&:lstat", STAT, "U:lstat", win32_wstat);
 #else
-	return posix_do_stat(self, args, "et:lstat", STAT, NULL, NULL);
+	return posix_do_stat(self, args, "O&:lstat", STAT, NULL, NULL);
 #endif
 #endif /* !HAVE_LSTAT */
 }
@@ -4570,16 +4662,18 @@ posix_readlink(PyObject *self, PyObject *args)
 {
 	PyObject* v;
 	char buf[MAXPATHLEN];
+	PyObject *opath;
 	char *path;
 	int n;
 	int arg_is_unicode = 0;

-	if (!PyArg_ParseTuple(args, "et:readlink", 
-				Py_FileSystemDefaultEncoding, &path))
+	if (!PyArg_ParseTuple(args, "O&:readlink", 
+				PyUnicode_FSConverter, &opath))
 		return NULL;
+	path = bytes2str(opath, 1);
 	v = PySequence_GetItem(args, 0);
 	if (v == NULL) {
-		PyMem_Free(path);
+		release_bytes(opath);
 		return NULL;
 	}

@@ -4592,16 +4686,16 @@ posix_readlink(PyObject *self, PyObject *args)
 	n = readlink(path, buf, (int) sizeof buf);
 	Py_END_ALLOW_THREADS
 	if (n < 0)
-		return posix_error_with_allocated_filename(path);
+		return posix_error_with_allocated_filename(opath);

-	PyMem_Free(path);
+	release_bytes(opath);
 	v = PyBytes_FromStringAndSize(buf, n);
 	if (arg_is_unicode) {
 		PyObject *w;

 		w = PyUnicode_FromEncodedObject(v,
 				Py_FileSystemDefaultEncoding,
-				"strict");
+				"utf8b");
 		if (w != NULL) {
 			Py_DECREF(v);
 			v = w;
@@ -4623,7 +4717,7 @@ Create a symbolic link pointing to src named dst.");
 static PyObject *
 posix_symlink(PyObject *self, PyObject *args)
 {
-	return posix_2str(args, "etet:symlink", symlink);
+	return posix_2str(args, "O&O&:symlink", symlink);
 }
 #endif /* HAVE_SYMLINK */

@@ -4811,7 +4905,8 @@ Open a file (for low level IO).");
 static PyObject *
 posix_open(PyObject *self, PyObject *args)
 {
-	char *file = NULL;
+	PyObject *ofile;
+	char *file;
 	int flag;
 	int mode = 0777;
 	int fd;
@@ -4835,17 +4930,17 @@ posix_open(PyObject *self, PyObject *args)
 	}
 #endif

-	if (!PyArg_ParseTuple(args, "eti|i",
-	                      Py_FileSystemDefaultEncoding, &file,
+	if (!PyArg_ParseTuple(args, "O&i|i",
+	                      PyUnicode_FSConverter, &ofile,
 	                      &flag, &mode))
 		return NULL;
-
+	file = bytes2str(ofile, 1);
 	Py_BEGIN_ALLOW_THREADS
 	fd = open(file, flag, mode);
 	Py_END_ALLOW_THREADS
 	if (fd < 0)
-		return posix_error_with_allocated_filename(file);
-	PyMem_Free(file);
+		return posix_error_with_allocated_filename(ofile);
+	release_bytes(ofile);
 	return PyLong_FromLong((long)fd);
 }

@@ -5289,20 +5384,27 @@ posix_putenv(PyObject *self, PyObject *args)
        wchar_t *s1, *s2;
        wchar_t *newenv;
 #else
+	PyObject *os1, *os2;
        char *s1, *s2;
        char *newenv;
 #endif
 	PyObject *newstr;
 	size_t len;

-	if (!PyArg_ParseTuple(args,
 #ifdef MS_WINDOWS
+	if (!PyArg_ParseTuple(args,
 			      "uu:putenv",
-#else
-			      "ss:putenv",
-#endif
 			      &s1, &s2))
 		return NULL;
+#else
+	if (!PyArg_ParseTuple(args,
+			      "O&O&:putenv",
+			      PyUnicode_FSConverter, &os1, 
+			      PyUnicode_FSConverter, &os2))
+		return NULL;
+	s1 = bytes2str(os1, 1);
+	s2 = bytes2str(os2, 1);
+#endif

 #if defined(PYOS_OS2)
    if (stricmp(s1, "BEGINLIBPATH") == 0) {
@@ -5345,6 +5447,8 @@ posix_putenv(PyObject *self, PyObject *args)
 	PyOS_snprintf(newenv, len, "%s=%s", s1, s2);
 	if (putenv(newenv)) {
                Py_DECREF(newstr);
+		release_bytes(os1);
+		release_bytes(os2);
                posix_error();
                return NULL;
 	}
@@ -5364,6 +5468,10 @@ posix_putenv(PyObject *self, PyObject *args)

 #if defined(PYOS_OS2)
    }
+#endif
+#ifndef MS_WINDOWS
+	release_bytes(os1);
+	release_bytes(os2);
 #endif
 	Py_INCREF(Py_None);
        return Py_None;
@@ -6688,6 +6796,7 @@ the underlying Win32 ShellExecute function doesn't work if it is.");
 static PyObject *
 win32_startfile(PyObject *self, PyObject *args)
 {
+	PyObject *ofilepath;
 	char *filepath;
 	char *operation = NULL;
 	HINSTANCE rc;
@@ -6729,20 +6838,21 @@ win32_startfile(PyObject *self, PyObject *args)
 #endif

 normal:
-	if (!PyArg_ParseTuple(args, "et|s:startfile", 
-			      Py_FileSystemDefaultEncoding, &filepath, 
+	if (!PyArg_ParseTuple(args, "O&|s:startfile", 
+			      PyUnicode_FSConverter, &ofilepath, 
 			      &operation))
 		return NULL;
+	filepath = bytes2str(ofilepath, 1);
 	Py_BEGIN_ALLOW_THREADS
 	rc = ShellExecute((HWND)0, operation, filepath, 
 			  NULL, NULL, SW_SHOWNORMAL);
 	Py_END_ALLOW_THREADS
 	if (rc <= (HINSTANCE)32) {
 		PyObject *errval = win32_error("startfile", filepath);
-		PyMem_Free(filepath);
+		release_bytes(ofilepath);
 		return errval;
 	}
-	PyMem_Free(filepath);
+	release_bytes(ofilepath);
 	Py_INCREF(Py_None);
 	return Py_None;
 }

--- a/Modules/python.c
+++ b/Modules/python.c
@@ -14,6 +14,93 @@ wmain(int argc, wchar_t **argv)
 	return Py_Main(argc, argv);
 }
 #else
+static wchar_t*
+char2wchar(char* arg)
+{
+	wchar_t *res;
+#ifdef HAVE_BROKEN_MBSTOWCS
+	/* Some platforms have a broken implementation of
+	 * mbstowcs which does not count the characters that
+	 * would result from conversion.  Use an upper bound.
+	 */
+	size_t argsize = strlen(arg);
+#else
+	size_t argsize = mbstowcs(NULL, arg, 0);
+#endif
+	size_t count;
+	unsigned char *in;
+	wchar_t *out;
+#ifdef HAVE_MBRTOWC
+	mbstate_t mbs;
+#endif
+	if (argsize != (size_t)-1) {
+		res = (wchar_t *)PyMem_Malloc((argsize+1)*sizeof(wchar_t));
+		if (!res)
+			goto oom;
+		count = mbstowcs(res, arg, argsize+1);
+		if (count != (size_t)-1)
+			return res;
+		PyMem_Free(res);
+	}
+	/* Conversion failed. Fall back to escaping with utf8b. */
+#ifdef HAVE_MBRTOWC
+	/* Try conversion with mbrtwoc (C99), and escape non-decodable bytes. */
+	
+	/* Overallocate; as multi-byte characters are in the argument, the
+	   actual output could use less memory. */
+	argsize = strlen(arg) + 1;
+	res = PyMem_Malloc(argsize*sizeof(wchar_t));
+	if (!res) goto oom;
+	in = (unsigned char*)arg;
+	out = res;
+	memset(&mbs, 0, sizeof mbs);
+	while (argsize) {
+		size_t converted = mbrtowc(out, (char*)in, argsize, &mbs);
+		if (converted == 0)
+			/* Reached end of string; null char stored. */
+			break;
+		if (converted == (size_t)-2) {
+			/* Incomplete character. This should never happen,
+			   since we provide everything that we have -
+			   unless there is a bug in the C library, or I 
+			   misunderstood how mbrtowc works. */
+			fprintf(stderr, "unexpected mbrtowc result -2\n");
+			return NULL;
+		}
+		if (converted == (size_t)-1) {
+			/* Conversion error. Escape as UTF-8b, and start over
+			   in the initial shift state. */
+			*out++ = 0xdc00 + *in++;
+			argsize--;
+			memset(&mbs, 0, sizeof mbs);
+			continue;
+		}
+		/* successfully converted some bytes */
+		in += converted;
+		argsize -= converted;
+		out++;
+	}
+#else
+	/* Cannot use C locale for escaping; manually escape as if charset
+	   is ASCII (i.e. escape all bytes > 128. This will still roundtrip
+	   correctly in the locale's charset, which must be an ASCII superset. */
+	res = PyMem_Malloc((strlen(arg)+1)*sizeof(wchar_t));
+	if (!res) goto oom;
+	in = (unsigned char*)arg;
+	out = res;
+	while(*in)
+		if(*in < 128)
+			*out++ = *in++;
+		else
+			*out++ = 0xdc00 + *in++;
+	*out = 0;
+#endif
+	return res;
+oom:
+	fprintf(stderr, "out of memory\n");
+	return NULL;
+}
+
 int
 main(int argc, char **argv)
 {
@@ -40,31 +127,9 @@ main(int argc, char **argv)
 	oldloc = strdup(setlocale(LC_ALL, NULL));
 	setlocale(LC_ALL, "");
 	for (i = 0; i < argc; i++) {
-#ifdef HAVE_BROKEN_MBSTOWCS
-		/* Some platforms have a broken implementation of
-		 * mbstowcs which does not count the characters that
-		 * would result from conversion.  Use an upper bound.
-		 */
-		size_t argsize = strlen(argv[i]);
-#else
-		size_t argsize = mbstowcs(NULL, argv[i], 0);
-#endif
-		size_t count;
-		if (argsize == (size_t)-1) {
-			fprintf(stderr, "Could not convert argument %d to string\n", i);
+		argv_copy2[i] = argv_copy[i] = char2wchar(argv[i]);
+		if (!argv_copy[i])
 			return 1;
-		}
-		argv_copy[i] = (wchar_t *)PyMem_Malloc((argsize+1)*sizeof(wchar_t));
-		argv_copy2[i] = argv_copy[i];
-		if (!argv_copy[i]) {
-			fprintf(stderr, "out of memory\n");
-			return 1;
-		}
-		count = mbstowcs(argv_copy[i], argv[i], argsize+1);
-		if (count == (size_t)-1) {
-			fprintf(stderr, "Could not convert argument %d to string\n", i);
-			return 1;
-		}
 	}
 	setlocale(LC_ALL, oldloc);
 	free(oldloc);

--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -1530,6 +1530,53 @@ PyUnicode_DecodeFSDefaultAndSize(const char *s, Py_ssize_t size)
    }
 }

+/* Convert the argument to a bytes object, according to the file
+   system encoding */
+
+int
+PyUnicode_FSConverter(PyObject* arg, void* addr)
+{
+    PyObject *output = NULL;
+    Py_ssize_t size;
+    void *data;
+    if (PyBytes_Check(arg) || PyByteArray_Check(arg)) {
+        output = arg;
+        Py_INCREF(output);
+    }
+    else {
+        arg = PyUnicode_FromObject(arg);
+        if (!arg)
+            return 0;
+        output = PyUnicode_AsEncodedObject(arg, 
+                                           Py_FileSystemDefaultEncoding,
+                                           "utf8b");
+        Py_DECREF(arg);
+        if (!output)
+            return 0;
+        if (!PyBytes_Check(output)) {
+            Py_DECREF(output);
+            PyErr_SetString(PyExc_TypeError, "encoder failed to return bytes");
+            return 0;
+        }
+    }
+    if (PyBytes_Check(output)) {
+         size = PyBytes_GET_SIZE(output);
+         data = PyBytes_AS_STRING(output);
+    } 
+    else {
+         size = PyByteArray_GET_SIZE(output);
+         data = PyByteArray_AS_STRING(output);
+    }
+    if (size != strlen(data)) {
+        PyErr_SetString(PyExc_TypeError, "embedded NUL character");
+        Py_DECREF(output);
+        return 0;
+    }
+    *(PyObject**)addr = output;
+    return 1;
+}
+
+
 char*
 _PyUnicode_AsStringAndSize(PyObject *unicode, Py_ssize_t *psize)
 {
@@ -4154,11 +4201,22 @@ static PyObject *unicode_encode_ucs1(const Py_UNICODE *p,
                                                              collstart-startp, collend-startp, &newpos);
                if (repunicode == NULL)
                    goto onError;
-                if (!PyUnicode_Check(repunicode)) {
-                    /* Implementation limitation: byte results not supported yet. */
-                    PyErr_SetString(PyExc_TypeError, "error handler should return unicode");
+                if (PyBytes_Check(repunicode)) {
+                    /* Directly copy bytes result to output. */
+                    repsize = PyBytes_Size(repunicode);
+                    if (repsize > 1) {
+                        /* Make room for all additional bytes. */
+                        if (_PyBytes_Resize(&res, ressize+repsize-1)) {
+                            Py_DECREF(repunicode);
+                            goto onError;
+                        }
+                        ressize += repsize-1;
+                    }
+                    memcpy(str, PyBytes_AsString(repunicode), repsize);
+                    str += repsize;
+                    p = startp + newpos;
                    Py_DECREF(repunicode);
-                    goto onError;
+                    break;
                }
                /* need more space? (at least enough for what we
                   have+the replacement+the rest of the string, so
@@ -5123,11 +5181,24 @@ int charmap_encoding_error(
                                                      collstartpos, collendpos, &newpos);
        if (repunicode == NULL)
            return -1;
-        if (!PyUnicode_Check(repunicode)) {
-            /* Implementation limitation: byte results not supported yet. */
-            PyErr_SetString(PyExc_TypeError, "error handler should return unicode");
+        if (PyBytes_Check(repunicode)) {
+            /* Directly copy bytes result to output. */
+            Py_ssize_t outsize = PyBytes_Size(*res);
+            Py_ssize_t requiredsize;
+            repsize = PyBytes_Size(repunicode);
+            requiredsize = *respos + repsize;
+            if (requiredsize > outsize)
+                /* Make room for all additional bytes. */
+                if (charmapencode_resize(res, respos, requiredsize)) {
+                    Py_DECREF(repunicode);
+                    return -1;
+                }
+            memcpy(PyBytes_AsString(*res) + *respos,
+                   PyBytes_AsString(repunicode),  repsize);
+            *respos += repsize;
+            *inpos = newpos;
            Py_DECREF(repunicode);
-            return -1;
+            break;
        }
        /* generate replacement  */
        repsize = PyUnicode_GET_SIZE(repunicode);
@@ -5691,7 +5762,7 @@ int PyUnicode_EncodeDecimal(Py_UNICODE *s,
            if (repunicode == NULL)
                goto onError;
            if (!PyUnicode_Check(repunicode)) {
-                /* Implementation limitation: byte results not supported yet. */
+                /* Byte results not supported, since they have no decimal property. */
                PyErr_SetString(PyExc_TypeError, "error handler should return unicode");
                Py_DECREF(repunicode);
                goto onError;

--- a/Python/codecs.c
+++ b/Python/codecs.c
@@ -829,6 +829,82 @@ PyCodec_SurrogateErrors(PyObject *exc)
    }
 }

+static PyObject *
+PyCodec_UTF8bErrors(PyObject *exc)
+{
+    PyObject *restuple;
+    PyObject *object;
+    Py_ssize_t start;
+    Py_ssize_t end;
+    PyObject *res;
+    if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
+	Py_UNICODE *p;
+	Py_UNICODE *startp;
+	char *outp;
+	if (PyUnicodeEncodeError_GetStart(exc, &start))
+	    return NULL;
+	if (PyUnicodeEncodeError_GetEnd(exc, &end))
+	    return NULL;
+	if (!(object = PyUnicodeEncodeError_GetObject(exc)))
+	    return NULL;
+	startp = PyUnicode_AS_UNICODE(object);
+	res = PyBytes_FromStringAndSize(NULL, end-start);
+	if (!res) {
+	    Py_DECREF(object);
+	    return NULL;
+	}
+	outp = PyBytes_AsString(res);
+	for (p = startp+start; p < startp+end; p++) {
+	    Py_UNICODE ch = *p;
+	    if (ch < 0xdc80 || ch > 0xdcff) {
+		/* Not a UTF-8b surrogate, fail with original exception */
+		PyErr_SetObject(PyExceptionInstance_Class(exc), exc);
+		Py_DECREF(res);
+		Py_DECREF(object);
+		return NULL;
+	    }
+	    *outp++ = ch - 0xdc00;
+	}
+	restuple = Py_BuildValue("(On)", res, end);
+	Py_DECREF(res);
+	Py_DECREF(object);
+	return restuple;
+    }
+    else if (PyObject_IsInstance(exc, PyExc_UnicodeDecodeError)) {
+	unsigned char *p;
+	Py_UNICODE ch[4]; /* decode up to 4 bad bytes. */
+	int consumed = 0;
+	if (PyUnicodeDecodeError_GetStart(exc, &start))
+	    return NULL;
+	if (PyUnicodeDecodeError_GetEnd(exc, &end))
+	    return NULL;
+	if (!(object = PyUnicodeDecodeError_GetObject(exc)))
+	    return NULL;
+	if (!(p = (unsigned char*)PyBytes_AsString(object))) {
+	    Py_DECREF(object);
+	    return NULL;
+	}
+	while (consumed < 4 && consumed < end-start) {
+	    /* Refuse to escape ASCII bytes. */
+	    if (p[start+consumed] < 128)
+		break;
+	    ch[consumed] = 0xdc00 + p[start+consumed];
+	    consumed++;
+	}
+	Py_DECREF(object);
+	if (!consumed) {
+	    /* codec complained about ASCII byte. */
+	    PyErr_SetObject(PyExceptionInstance_Class(exc), exc);
+	    return NULL;
+	}	    
+	return Py_BuildValue("(u#n)", ch, consumed, start+consumed);
+    }
+    else {
+	wrong_exception_type(exc);
+	return NULL;
+    }
+}
+
 	
 static PyObject *strict_errors(PyObject *self, PyObject *exc)
 {
@@ -864,6 +940,11 @@ static PyObject *surrogates_errors(PyObject *self, PyObject *exc)
    return PyCodec_SurrogateErrors(exc);
 }

+static PyObject *utf8b_errors(PyObject *self, PyObject *exc)
+{
+    return PyCodec_UTF8bErrors(exc);
+}
+
 static int _PyCodecRegistry_Init(void)
 {
    static struct {
@@ -918,6 +999,14 @@ static int _PyCodecRegistry_Init(void)
 		surrogates_errors,
 		METH_O
 	    }
+	},
+	{
+	    "utf8b",
+	    {
+		"utf8b",
+		utf8b_errors,
+		METH_O
+	    }
 	}
    };


--- a/Python/pythonrun.c
+++ b/Python/pythonrun.c
@@ -262,6 +262,22 @@ Py_InitializeEx(int install_sigs)

 	_PyImportHooks_Init();

+#if defined(HAVE_LANGINFO_H) && defined(CODESET)
+	/* On Unix, set the file system encoding according to the
+	   user's preference, if the CODESET names a well-known
+	   Python codec, and Py_FileSystemDefaultEncoding isn't
+	   initialized by other means. Also set the encoding of
+	   stdin and stdout if these are terminals.  */
+
+	codeset = get_codeset();
+	if (codeset) {
+		if (!Py_FileSystemDefaultEncoding)
+			Py_FileSystemDefaultEncoding = codeset;
+		else
+			free(codeset);
+	}
+#endif
+
 	if (install_sigs)
 		initsigs(); /* Signal handling stuff, including initintr() */
 		
@@ -285,22 +301,6 @@ Py_InitializeEx(int install_sigs)
 #ifdef WITH_THREAD
 	_PyGILState_Init(interp, tstate);
 #endif /* WITH_THREAD */
-
-#if defined(HAVE_LANGINFO_H) && defined(CODESET)
-	/* On Unix, set the file system encoding according to the
-	   user's preference, if the CODESET names a well-known
-	   Python codec, and Py_FileSystemDefaultEncoding isn't
-	   initialized by other means. Also set the encoding of
-	   stdin and stdout if these are terminals.  */
-
-	codeset = get_codeset();
-	if (codeset) {
-		if (!Py_FileSystemDefaultEncoding)
-			Py_FileSystemDefaultEncoding = codeset;
-		else
-			free(codeset);
-	}
-#endif
 }

 void

--- a/configure
+++ b/configure
 #! /bin/sh
-# From configure.in Revision: 71731 .
+# From configure.in Revision: 72144 .
 # Guess values for system-dependent variables and create Makefiles.
 # Generated by GNU Autoconf 2.61 for python 3.1.
 #
@@ -16297,13 +16297,14 @@ echo "${ECHO_T}MACHDEP_OBJS" >&6; }



+


 for ac_func in alarm setitimer getitimer bind_textdomain_codeset chown \
 clock confstr ctermid execv fchmod fchown fork fpathconf ftime ftruncate \
 gai_strerror getgroups getlogin getloadavg getpeername getpgid getpid \
 getpriority getpwent getspnam getspent getsid getwd \
- kill killpg lchmod lchown lstat mkfifo mknod mktime \
+ kill killpg lchmod lchown lstat mbrtowc mkfifo mknod mktime \
 mremap nice pathconf pause plock poll pthread_init \
 putenv readlink realpath \
 select sem_open sem_timedwait sem_getvalue sem_unlink setegid seteuid \

--- a/configure.in
+++ b/configure.in
@@ -2403,7 +2403,7 @@ AC_CHECK_FUNCS(alarm setitimer getitimer bind_textdomain_codeset chown \
 clock confstr ctermid execv fchmod fchown fork fpathconf ftime ftruncate \
 gai_strerror getgroups getlogin getloadavg getpeername getpgid getpid \
 getpriority getpwent getspnam getspent getsid getwd \
- kill killpg lchmod lchown lstat mkfifo mknod mktime \
+ kill killpg lchmod lchown lstat mbrtowc mkfifo mknod mktime \
 mremap nice pathconf pause plock poll pthread_init \
 putenv readlink realpath \
 select sem_open sem_timedwait sem_getvalue sem_unlink setegid seteuid \

--- a/pyconfig.h.in
+++ b/pyconfig.h.in
@@ -419,6 +419,9 @@
 /* Define this if you have the makedev macro. */
 #undef HAVE_MAKEDEV

+/* Define to 1 if you have the `mbrtowc' function. */
+#undef HAVE_MBRTOWC
+
 /* Define to 1 if you have the `memmove' function. */
 #undef HAVE_MEMMOVE