Commit 0185f34d authored by Serhiy Storchaka's avatar Serhiy Storchaka Committed by GitHub

bpo-33721: Make some os.path functions and pathlib.Path methods be tolerant...

 bpo-33721: Make some os.path functions and pathlib.Path methods be tolerant to invalid paths.  (#7695)

Such functions as os.path.exists(), os.path.lexists(), os.path.isdir(),
os.path.isfile(), os.path.islink(), and os.path.ismount() now return False
instead of raising ValueError or its subclasses UnicodeEncodeError
and UnicodeDecodeError for paths that contain characters or bytes
unrepresentative at the OS level.
parent 7bdf2826
......@@ -55,6 +55,14 @@ the :mod:`glob` module.)
* :mod:`macpath` for old-style MacOS paths
.. versionchanged:: 3.8
:func:`exists`, :func:`lexists`, :func:`isdir`, :func:`isfile`,
:func:`islink`, and :func:`ismount` now return ``False`` instead of
raising an exception for paths that contain characters or bytes
unrepresentable at the OS level.
.. function:: abspath(path)
Return a normalized absolutized version of the pathname *path*. On most
......
......@@ -638,7 +638,17 @@ Methods
Concrete paths provide the following methods in addition to pure paths
methods. Many of these methods can raise an :exc:`OSError` if a system
call fails (for example because the path doesn't exist):
call fails (for example because the path doesn't exist).
.. versionchanged:: 3.8
:meth:`~Path.exists()`, :meth:`~Path.is_dir()`, :meth:`~Path.is_file()`,
:meth:`~Path.is_mount()`, :meth:`~Path.is_symlink()`,
:meth:`~Path.is_block_device()`, :meth:`~Path.is_char_device()`,
:meth:`~Path.is_fifo()`, :meth:`~Path.is_socket()` now return ``False``
instead of raising an exception for paths that contain characters
unrepresentable at the OS level.
.. classmethod:: Path.cwd()
......
......@@ -112,6 +112,31 @@ New Modules
Improved Modules
================
os.path
-------
:mod:`os.path` functions that return a boolean result like
:func:`~os.path.exists`, :func:`~os.path.lexists`, :func:`~os.path.isdir`,
:func:`~os.path.isfile`, :func:`~os.path.islink`, and :func:`~os.path.ismount`
now return ``False`` instead of raising :exc:`ValueError` or its subclasses
:exc:`UnicodeEncodeError` and :exc:`UnicodeDecodeError` for paths that contain
characters or bytes unrepresentable at the OS level.
(Contributed by Serhiy Storchaka in :issue:`33721`.)
pathlib
-------
:mod:`pathlib.Path` methods that return a boolean result like
:meth:`~pathlib.Path.exists()`, :meth:`~pathlib.Path.is_dir()`,
:meth:`~pathlib.Path.is_file()`, :meth:`~pathlib.Path.is_mount()`,
:meth:`~pathlib.Path.is_symlink()`, :meth:`~pathlib.Path.is_block_device()`,
:meth:`~pathlib.Path.is_char_device()`, :meth:`~pathlib.Path.is_fifo()`,
:meth:`~pathlib.Path.is_socket()` now return ``False`` instead of raising
:exc:`ValueError` or its subclass :exc:`UnicodeEncodeError` for paths that
contain characters unrepresentable at the OS level.
(Contributed by Serhiy Storchaka in :issue:`33721`.)
Optimizations
=============
......
......@@ -17,7 +17,7 @@ def exists(path):
"""Test whether a path exists. Returns False for broken symbolic links"""
try:
os.stat(path)
except OSError:
except (OSError, ValueError):
return False
return True
......@@ -28,7 +28,7 @@ def isfile(path):
"""Test whether a path is a regular file"""
try:
st = os.stat(path)
except OSError:
except (OSError, ValueError):
return False
return stat.S_ISREG(st.st_mode)
......@@ -40,7 +40,7 @@ def isdir(s):
"""Return true if the pathname refers to an existing directory."""
try:
st = os.stat(s)
except OSError:
except (OSError, ValueError):
return False
return stat.S_ISDIR(st.st_mode)
......
......@@ -138,7 +138,7 @@ def lexists(path):
try:
st = os.lstat(path)
except OSError:
except (OSError, ValueError):
return False
return True
......
......@@ -229,7 +229,7 @@ def islink(path):
"""
try:
st = os.lstat(path)
except (OSError, AttributeError):
except (OSError, ValueError, AttributeError):
return False
return stat.S_ISLNK(st.st_mode)
......@@ -239,7 +239,7 @@ def lexists(path):
"""Test whether a path exists. Returns True for broken symbolic links"""
try:
st = os.lstat(path)
except OSError:
except (OSError, ValueError):
return False
return True
......@@ -524,7 +524,7 @@ else: # use native Windows method on Windows
"""Return the absolute version of a path."""
try:
return _getfullpathname(path)
except OSError:
except (OSError, ValueError):
return _abspath_fallback(path)
# realpath is a no-op on systems without islink support
......
......@@ -1331,6 +1331,9 @@ class Path(PurePath):
if e.errno not in _IGNORED_ERROS:
raise
return False
except ValueError:
# Non-encodable path
return False
return True
def is_dir(self):
......@@ -1345,6 +1348,9 @@ class Path(PurePath):
# Path doesn't exist or is a broken symlink
# (see https://bitbucket.org/pitrou/pathlib/issue/12/)
return False
except ValueError:
# Non-encodable path
return False
def is_file(self):
"""
......@@ -1359,6 +1365,9 @@ class Path(PurePath):
# Path doesn't exist or is a broken symlink
# (see https://bitbucket.org/pitrou/pathlib/issue/12/)
return False
except ValueError:
# Non-encodable path
return False
def is_mount(self):
"""
......@@ -1392,6 +1401,9 @@ class Path(PurePath):
raise
# Path doesn't exist
return False
except ValueError:
# Non-encodable path
return False
def is_block_device(self):
"""
......@@ -1405,6 +1417,9 @@ class Path(PurePath):
# Path doesn't exist or is a broken symlink
# (see https://bitbucket.org/pitrou/pathlib/issue/12/)
return False
except ValueError:
# Non-encodable path
return False
def is_char_device(self):
"""
......@@ -1418,6 +1433,9 @@ class Path(PurePath):
# Path doesn't exist or is a broken symlink
# (see https://bitbucket.org/pitrou/pathlib/issue/12/)
return False
except ValueError:
# Non-encodable path
return False
def is_fifo(self):
"""
......@@ -1431,6 +1449,9 @@ class Path(PurePath):
# Path doesn't exist or is a broken symlink
# (see https://bitbucket.org/pitrou/pathlib/issue/12/)
return False
except ValueError:
# Non-encodable path
return False
def is_socket(self):
"""
......@@ -1444,6 +1465,9 @@ class Path(PurePath):
# Path doesn't exist or is a broken symlink
# (see https://bitbucket.org/pitrou/pathlib/issue/12/)
return False
except ValueError:
# Non-encodable path
return False
def expanduser(self):
""" Return a new path with expanded ~ and ~user constructs
......
......@@ -169,7 +169,7 @@ def islink(path):
"""Test whether a path is a symbolic link"""
try:
st = os.lstat(path)
except (OSError, AttributeError):
except (OSError, ValueError, AttributeError):
return False
return stat.S_ISLNK(st.st_mode)
......@@ -179,7 +179,7 @@ def lexists(path):
"""Test whether a path exists. Returns True for broken symbolic links"""
try:
os.lstat(path)
except OSError:
except (OSError, ValueError):
return False
return True
......@@ -191,7 +191,7 @@ def ismount(path):
"""Test whether a path is a mount point"""
try:
s1 = os.lstat(path)
except OSError:
except (OSError, ValueError):
# It doesn't exist -- so not a mount point. :-)
return False
else:
......@@ -206,7 +206,7 @@ def ismount(path):
parent = realpath(parent)
try:
s2 = os.lstat(parent)
except OSError:
except (OSError, ValueError):
return False
dev1 = s1.st_dev
......
......@@ -138,10 +138,20 @@ class GenericTest:
self.assertIs(self.pathmodule.exists(filename), True)
self.assertIs(self.pathmodule.exists(bfilename), True)
self.assertIs(self.pathmodule.exists(filename + '\udfff'), False)
self.assertIs(self.pathmodule.exists(bfilename + b'\xff'), False)
self.assertIs(self.pathmodule.exists(filename + '\x00'), False)
self.assertIs(self.pathmodule.exists(bfilename + b'\x00'), False)
if self.pathmodule is not genericpath:
self.assertIs(self.pathmodule.lexists(filename), True)
self.assertIs(self.pathmodule.lexists(bfilename), True)
self.assertIs(self.pathmodule.lexists(filename + '\udfff'), False)
self.assertIs(self.pathmodule.lexists(bfilename + b'\xff'), False)
self.assertIs(self.pathmodule.lexists(filename + '\x00'), False)
self.assertIs(self.pathmodule.lexists(bfilename + b'\x00'), False)
@unittest.skipUnless(hasattr(os, "pipe"), "requires os.pipe()")
def test_exists_fd(self):
r, w = os.pipe()
......@@ -158,6 +168,11 @@ class GenericTest:
self.assertIs(self.pathmodule.isdir(filename), False)
self.assertIs(self.pathmodule.isdir(bfilename), False)
self.assertIs(self.pathmodule.isdir(filename + '\udfff'), False)
self.assertIs(self.pathmodule.isdir(bfilename + b'\xff'), False)
self.assertIs(self.pathmodule.isdir(filename + '\x00'), False)
self.assertIs(self.pathmodule.isdir(bfilename + b'\x00'), False)
try:
create_file(filename)
self.assertIs(self.pathmodule.isdir(filename), False)
......@@ -178,6 +193,11 @@ class GenericTest:
self.assertIs(self.pathmodule.isfile(filename), False)
self.assertIs(self.pathmodule.isfile(bfilename), False)
self.assertIs(self.pathmodule.isfile(filename + '\udfff'), False)
self.assertIs(self.pathmodule.isfile(bfilename + b'\xff'), False)
self.assertIs(self.pathmodule.isfile(filename + '\x00'), False)
self.assertIs(self.pathmodule.isfile(bfilename + b'\x00'), False)
try:
create_file(filename)
self.assertIs(self.pathmodule.isfile(filename), True)
......@@ -298,18 +318,20 @@ class TestGenericTest(GenericTest, unittest.TestCase):
continue
func = getattr(self.pathmodule, attr)
with self.subTest(attr=attr):
try:
if attr in ('exists', 'isdir', 'isfile'):
func('/tmp\udfffabcds')
except (OSError, UnicodeEncodeError):
pass
try:
func(b'/tmp\xffabcds')
except (OSError, UnicodeDecodeError):
pass
with self.assertRaisesRegex(ValueError, 'embedded null'):
func('/tmp\x00abcds')
with self.assertRaisesRegex(ValueError, 'embedded null'):
func(b'/tmp\x00abcds')
else:
with self.assertRaises((OSError, UnicodeEncodeError)):
func('/tmp\udfffabcds')
with self.assertRaises((OSError, UnicodeDecodeError)):
func(b'/tmp\xffabcds')
with self.assertRaisesRegex(ValueError, 'embedded null'):
func('/tmp\x00abcds')
with self.assertRaisesRegex(ValueError, 'embedded null'):
func(b'/tmp\x00abcds')
# Following TestCase is not supposed to be run from test_genericpath.
# It is inherited by other test modules (macpath, ntpath, posixpath).
......
......@@ -1342,6 +1342,8 @@ class _BasePathTest(object):
self.assertIs(False, (p / 'linkA' / 'bah').exists())
self.assertIs(False, (p / 'foo').exists())
self.assertIs(False, P('/xyzzy').exists())
self.assertIs(False, P(BASE + '\udfff').exists())
self.assertIs(False, P(BASE + '\x00').exists())
def test_open_common(self):
p = self.cls(BASE)
......@@ -1866,7 +1868,9 @@ class _BasePathTest(object):
if support.can_symlink():
self.assertFalse((P / 'linkA').is_dir())
self.assertTrue((P / 'linkB').is_dir())
self.assertFalse((P/ 'brokenLink').is_dir())
self.assertFalse((P/ 'brokenLink').is_dir(), False)
self.assertIs((P / 'dirA\udfff').is_dir(), False)
self.assertIs((P / 'dirA\x00').is_dir(), False)
def test_is_file(self):
P = self.cls(BASE)
......@@ -1878,6 +1882,8 @@ class _BasePathTest(object):
self.assertTrue((P / 'linkA').is_file())
self.assertFalse((P / 'linkB').is_file())
self.assertFalse((P/ 'brokenLink').is_file())
self.assertIs((P / 'fileA\udfff').is_file(), False)
self.assertIs((P / 'fileA\x00').is_file(), False)
@only_posix
def test_is_mount(self):
......@@ -1890,6 +1896,8 @@ class _BasePathTest(object):
self.assertTrue(R.is_mount())
if support.can_symlink():
self.assertFalse((P / 'linkA').is_mount())
self.assertIs(self.cls('/\udfff').is_mount(), False)
self.assertIs(self.cls('/\x00').is_mount(), False)
def test_is_symlink(self):
P = self.cls(BASE)
......@@ -1901,6 +1909,11 @@ class _BasePathTest(object):
self.assertTrue((P / 'linkA').is_symlink())
self.assertTrue((P / 'linkB').is_symlink())
self.assertTrue((P/ 'brokenLink').is_symlink())
self.assertIs((P / 'fileA\udfff').is_file(), False)
self.assertIs((P / 'fileA\x00').is_file(), False)
if support.can_symlink():
self.assertIs((P / 'linkA\udfff').is_file(), False)
self.assertIs((P / 'linkA\x00').is_file(), False)
def test_is_fifo_false(self):
P = self.cls(BASE)
......@@ -1908,6 +1921,8 @@ class _BasePathTest(object):
self.assertFalse((P / 'dirA').is_fifo())
self.assertFalse((P / 'non-existing').is_fifo())
self.assertFalse((P / 'fileA' / 'bah').is_fifo())
self.assertIs((P / 'fileA\udfff').is_fifo(), False)
self.assertIs((P / 'fileA\x00').is_fifo(), False)
@unittest.skipUnless(hasattr(os, "mkfifo"), "os.mkfifo() required")
def test_is_fifo_true(self):
......@@ -1919,6 +1934,8 @@ class _BasePathTest(object):
self.assertTrue(P.is_fifo())
self.assertFalse(P.is_socket())
self.assertFalse(P.is_file())
self.assertIs(self.cls(BASE, 'myfifo\udfff').is_fifo(), False)
self.assertIs(self.cls(BASE, 'myfifo\x00').is_fifo(), False)
def test_is_socket_false(self):
P = self.cls(BASE)
......@@ -1926,6 +1943,8 @@ class _BasePathTest(object):
self.assertFalse((P / 'dirA').is_socket())
self.assertFalse((P / 'non-existing').is_socket())
self.assertFalse((P / 'fileA' / 'bah').is_socket())
self.assertIs((P / 'fileA\udfff').is_socket(), False)
self.assertIs((P / 'fileA\x00').is_socket(), False)
@unittest.skipUnless(hasattr(socket, "AF_UNIX"), "Unix sockets required")
def test_is_socket_true(self):
......@@ -1941,6 +1960,8 @@ class _BasePathTest(object):
self.assertTrue(P.is_socket())
self.assertFalse(P.is_fifo())
self.assertFalse(P.is_file())
self.assertIs(self.cls(BASE, 'mysock\udfff').is_socket(), False)
self.assertIs(self.cls(BASE, 'mysock\x00').is_socket(), False)
def test_is_block_device_false(self):
P = self.cls(BASE)
......@@ -1948,6 +1969,8 @@ class _BasePathTest(object):
self.assertFalse((P / 'dirA').is_block_device())
self.assertFalse((P / 'non-existing').is_block_device())
self.assertFalse((P / 'fileA' / 'bah').is_block_device())
self.assertIs((P / 'fileA\udfff').is_block_device(), False)
self.assertIs((P / 'fileA\x00').is_block_device(), False)
def test_is_char_device_false(self):
P = self.cls(BASE)
......@@ -1955,6 +1978,8 @@ class _BasePathTest(object):
self.assertFalse((P / 'dirA').is_char_device())
self.assertFalse((P / 'non-existing').is_char_device())
self.assertFalse((P / 'fileA' / 'bah').is_char_device())
self.assertIs((P / 'fileA\udfff').is_char_device(), False)
self.assertIs((P / 'fileA\x00').is_char_device(), False)
def test_is_char_device_true(self):
# Under Unix, /dev/null should generally be a char device
......@@ -1964,6 +1989,8 @@ class _BasePathTest(object):
self.assertTrue(P.is_char_device())
self.assertFalse(P.is_block_device())
self.assertFalse(P.is_file())
self.assertIs(self.cls('/dev/null\udfff').is_char_device(), False)
self.assertIs(self.cls('/dev/null\x00').is_char_device(), False)
def test_pickling_common(self):
p = self.cls(BASE, 'fileA')
......
......@@ -153,9 +153,11 @@ class PosixPathTest(unittest.TestCase):
def test_islink(self):
self.assertIs(posixpath.islink(support.TESTFN + "1"), False)
self.assertIs(posixpath.lexists(support.TESTFN + "2"), False)
with open(support.TESTFN + "1", "wb") as f:
f.write(b"foo")
self.assertIs(posixpath.islink(support.TESTFN + "1"), False)
if support.can_symlink():
os.symlink(support.TESTFN + "1", support.TESTFN + "2")
self.assertIs(posixpath.islink(support.TESTFN + "2"), True)
......@@ -164,6 +166,11 @@ class PosixPathTest(unittest.TestCase):
self.assertIs(posixpath.exists(support.TESTFN + "2"), False)
self.assertIs(posixpath.lexists(support.TESTFN + "2"), True)
self.assertIs(posixpath.islink(support.TESTFN + "\udfff"), False)
self.assertIs(posixpath.islink(os.fsencode(support.TESTFN) + b"\xff"), False)
self.assertIs(posixpath.islink(support.TESTFN + "\x00"), False)
self.assertIs(posixpath.islink(os.fsencode(support.TESTFN) + b"\x00"), False)
def test_ismount(self):
self.assertIs(posixpath.ismount("/"), True)
self.assertIs(posixpath.ismount(b"/"), True)
......@@ -177,6 +184,11 @@ class PosixPathTest(unittest.TestCase):
finally:
safe_rmdir(ABSTFN)
self.assertIs(posixpath.ismount('/\udfff'), False)
self.assertIs(posixpath.ismount(b'/\xff'), False)
self.assertIs(posixpath.ismount('/\x00'), False)
self.assertIs(posixpath.ismount(b'/\x00'), False)
@unittest.skipUnless(support.can_symlink(),
"Test requires symlink support")
def test_ismount_symlinks(self):
......
......@@ -159,13 +159,11 @@ class HelperFunctionsTests(unittest.TestCase):
# Issue 5258
pth_dir, pth_fn = self.make_pth("abc\x00def\n")
with captured_stderr() as err_out:
site.addpackage(pth_dir, pth_fn, set())
self.assertRegex(err_out.getvalue(), "line 1")
self.assertRegex(err_out.getvalue(),
re.escape(os.path.join(pth_dir, pth_fn)))
# XXX: ditto previous XXX comment.
self.assertRegex(err_out.getvalue(), 'Traceback')
self.assertRegex(err_out.getvalue(), 'ValueError')
self.assertFalse(site.addpackage(pth_dir, pth_fn, set()))
self.assertEqual(err_out.getvalue(), "")
for path in sys.path:
if isinstance(path, str):
self.assertNotIn("abc\x00def", path)
def test_addsitedir(self):
# Same tests for test_addpackage since addsitedir() essentially just
......
:mod:`os.path` functions that return a boolean result like
:func:`~os.path.exists`, :func:`~os.path.lexists`, :func:`~os.path.isdir`,
:func:`~os.path.isfile`, :func:`~os.path.islink`, and :func:`~os.path.ismount`,
and :mod:`pathlib.Path` methods that return a boolean result like
:meth:`~pathlib.Path.exists()`, :meth:`~pathlib.Path.is_dir()`,
:meth:`~pathlib.Path.is_file()`, :meth:`~pathlib.Path.is_mount()`,
:meth:`~pathlib.Path.is_symlink()`, :meth:`~pathlib.Path.is_block_device()`,
:meth:`~pathlib.Path.is_char_device()`, :meth:`~pathlib.Path.is_fifo()`,
:meth:`~pathlib.Path.is_socket()` now return ``False`` instead of raising
:exc:`ValueError` or its subclasses :exc:`UnicodeEncodeError` and
:exc:`UnicodeDecodeError` for paths that contain characters or bytes
unrepresentable at the OS level.
......@@ -1005,27 +1005,6 @@ PyDoc_STRVAR(os__isdir__doc__,
#define OS__ISDIR_METHODDEF \
{"_isdir", (PyCFunction)os__isdir, METH_O, os__isdir__doc__},
static PyObject *
os__isdir_impl(PyObject *module, path_t *path);
static PyObject *
os__isdir(PyObject *module, PyObject *arg)
{
PyObject *return_value = NULL;
path_t path = PATH_T_INITIALIZE("_isdir", "path", 0, 0);
if (!PyArg_Parse(arg, "O&:_isdir", path_converter, &path)) {
goto exit;
}
return_value = os__isdir_impl(module, &path);
exit:
/* Cleanup for path */
path_cleanup(&path);
return return_value;
}
#endif /* defined(MS_WINDOWS) */
#if defined(MS_WINDOWS)
......@@ -6778,4 +6757,4 @@ exit:
#ifndef OS_GETRANDOM_METHODDEF
#define OS_GETRANDOM_METHODDEF
#endif /* !defined(OS_GETRANDOM_METHODDEF) */
/*[clinic end generated code: output=0f23518dd4482e66 input=a9049054013a1b77]*/
/*[clinic end generated code: output=40cac0135f846202 input=a9049054013a1b77]*/
......@@ -3821,22 +3821,32 @@ cleanup:
/*[clinic input]
os._isdir
path: path_t
path as arg: object
/
Return true if the pathname refers to an existing directory.
[clinic start generated code]*/
static PyObject *
os__isdir_impl(PyObject *module, path_t *path)
/*[clinic end generated code: output=75f56f32720836cb input=5e0800149c0ad95f]*/
os__isdir(PyObject *module, PyObject *arg)
/*[clinic end generated code: output=404f334d85d4bf25 input=36cb6785874d479e]*/
{
DWORD attributes;
path_t path = PATH_T_INITIALIZE("_isdir", "path", 0, 0);
if (!path_converter(arg, &path)) {
if (PyErr_ExceptionMatches(PyExc_ValueError)) {
PyErr_Clear();
Py_RETURN_FALSE;
}
return NULL;
}
Py_BEGIN_ALLOW_THREADS
attributes = GetFileAttributesW(path->wide);
attributes = GetFileAttributesW(path.wide);
Py_END_ALLOW_THREADS
path_cleanup(&path);
if (attributes == INVALID_FILE_ATTRIBUTES)
Py_RETURN_FALSE;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment