Commit 5c958879 authored by Victor Stinner's avatar Victor Stinner

Create os.fsdecode(): decode from the filesystem encoding with surrogateescape

error handler, or strict error handler on Windows.

 * Rewrite os.fsencode() documentation
 * Improve os.fsencode and os.fsdecode() tests using the new PYTHONFSENCODING
   environment variable
parent 251a1566
......@@ -155,13 +155,26 @@ process and user.
These functions are described in :ref:`os-file-dir`.
.. function:: fsencode(value)
.. function:: fsencode(filename)
Encode *value* to bytes for use in the file system, environment variables or
the command line. Use :func:`sys.getfilesystemencoding` and
``'surrogateescape'`` error handler for strings and return bytes unchanged.
On Windows, use ``'strict'`` error handler for strings if the file system
encoding is ``'mbcs'`` (which is the default encoding).
Encode *filename* to the filesystem encoding with ``'surrogateescape'``
error handler, return :class:`bytes` unchanged. On Windows, use ``'strict'``
error handler if the filesystem encoding is ``'mbcs'`` (which is the default
encoding).
:func:`fsdencode` is the reverse function.
.. versionadded:: 3.2
.. function:: fsdecode(filename)
Decode *filename* from the filesystem encoding with ``'surrogateescape'``
error handler, return :class:`str` unchanged. On Windows, use ``'strict'``
error handler if the filesystem encoding is ``'mbcs'`` (which is the default
encoding).
:func:`fsencode` is the reverse function.
.. versionadded:: 3.2
......
......@@ -237,13 +237,16 @@ Major performance enhancements have been added:
* Stub
Unicode
=======
Filenames and unicode
=====================
The filesystem encoding can be specified by setting the
:envvar:`PYTHONFSENCODING` environment variable before running the interpreter.
The value should be a string in the form ``<encoding>``, e.g. ``utf-8``.
The :mod:`os` module has two new functions: :func:`os.fsencode` and
:func:`os.fsdecode`.
IDLE
====
......
......@@ -402,8 +402,7 @@ def get_exec_path(env=None):
path_list = path_listb
if path_list is not None and isinstance(path_list, bytes):
path_list = path_list.decode(sys.getfilesystemencoding(),
'surrogateescape')
path_list = fsdecode(path_list)
if path_list is None:
path_list = defpath
......@@ -536,19 +535,39 @@ if supports_bytes_environ:
__all__.extend(("environb", "getenvb"))
def fsencode(value):
"""Encode value for use in the file system, environment variables
or the command line."""
if isinstance(value, bytes):
return value
elif isinstance(value, str):
def fsencode(filename):
"""
Encode filename to the filesystem encoding with 'surrogateescape' error
handler, return bytes unchanged. On Windows, use 'strict' error handler if
the file system encoding is 'mbcs' (which is the default encoding).
"""
if isinstance(filename, bytes):
return filename
elif isinstance(filename, str):
encoding = sys.getfilesystemencoding()
if encoding == 'mbcs':
return filename.encode(encoding)
else:
return filename.encode(encoding, 'surrogateescape')
else:
raise TypeError("expect bytes or str, not %s" % type(filename).__name__)
def fsdecode(filename):
"""
Decode filename from the filesystem encoding with 'surrogateescape' error
handler, return str unchanged. On Windows, use 'strict' error handler if
the file system encoding is 'mbcs' (which is the default encoding).
"""
if isinstance(filename, str):
return filename
elif isinstance(filename, bytes):
encoding = sys.getfilesystemencoding()
if encoding == 'mbcs':
return value.encode(encoding)
return filename.decode(encoding)
else:
return value.encode(encoding, 'surrogateescape')
return filename.decode(encoding, 'surrogateescape')
else:
raise TypeError("expect bytes or str, not %s" % type(value).__name__)
raise TypeError("expect bytes or str, not %s" % type(filename).__name__)
def _exists(name):
return name in globals()
......
......@@ -897,14 +897,6 @@ if sys.platform != 'win32':
class Pep383Tests(unittest.TestCase):
def setUp(self):
def fsdecode(filename):
encoding = sys.getfilesystemencoding()
if encoding == 'mbcs':
errors = 'strict'
else:
errors = 'surrogateescape'
return filename.decode(encoding, errors)
if support.TESTFN_UNENCODABLE:
self.dir = support.TESTFN_UNENCODABLE
else:
......@@ -930,7 +922,7 @@ if sys.platform != 'win32':
for fn in bytesfn:
f = open(os.path.join(self.bdir, fn), "w")
f.close()
fn = fsdecode(fn)
fn = os.fsdecode(fn)
if fn in self.unicodefn:
raise ValueError("duplicate filename")
self.unicodefn.add(fn)
......@@ -1139,12 +1131,43 @@ class Win32SymlinkTests(unittest.TestCase):
self.assertNotEqual(os.lstat(link), os.stat(link))
class MiscTests(unittest.TestCase):
class FSEncodingTests(unittest.TestCase):
def test_nop(self):
self.assertEquals(os.fsencode(b'abc\xff'), b'abc\xff')
self.assertEquals(os.fsdecode('abc\u0141'), 'abc\u0141')
@unittest.skipIf(os.name == "nt", "POSIX specific test")
def test_fsencode(self):
self.assertEquals(os.fsencode(b'ab\xff'), b'ab\xff')
self.assertEquals(os.fsencode('ab\uDCFF'), b'ab\xff')
def test_identity(self):
# assert fsdecode(fsencode(x)) == x
for fn in ('unicode\u0141', 'latin\xe9', 'ascii'):
try:
bytesfn = os.fsencode(fn)
except UnicodeEncodeError:
continue
self.assertEquals(os.fsdecode(bytesfn), fn)
def get_output(self, fs_encoding, func):
env = os.environ.copy()
env['PYTHONIOENCODING'] = 'utf-8'
env['PYTHONFSENCODING'] = fs_encoding
code = 'import os; print(%s, end="")' % func
process = subprocess.Popen(
[sys.executable, "-c", code],
stdout=subprocess.PIPE, env=env)
stdout, stderr = process.communicate()
self.assertEqual(process.returncode, 0)
return stdout.decode('utf-8')
def test_encodings(self):
def check(encoding, bytesfn, unicodefn):
encoded = self.get_output(encoding, 'repr(os.fsencode(%a))' % unicodefn)
self.assertEqual(encoded, repr(bytesfn))
decoded = self.get_output(encoding, 'repr(os.fsdecode(%a))' % bytesfn)
self.assertEqual(decoded, repr(unicodefn))
check('ascii', b'abc\xff', 'abc\udcff')
check('utf-8', b'\xc3\xa9\x80', '\xe9\udc80')
check('iso-8859-15', b'\xef\xa4', '\xef\u20ac')
def test_main():
......@@ -1163,7 +1186,7 @@ def test_main():
Pep383Tests,
Win32KillTests,
Win32SymlinkTests,
MiscTests,
FSEncodingTests,
)
if __name__ == "__main__":
......
......@@ -116,6 +116,9 @@ Extensions
Library
-------
- Create os.fsdecode(): decode from the filesystem encoding with
surrogateescape error handler, or strict error handler on Windows.
- Issue #3488: Provide convenient shorthand functions ``gzip.compress``
and ``gzip.decompress``. Original patch by Anand B. Pillai.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment