Commit d86accdc authored by Victor Stinner's avatar Victor Stinner

Issue #8969: On Windows, use mbcs codec in strict mode to encode and decode

filenames and enable os.fsencode().
parent 7dc41b67
...@@ -159,10 +159,10 @@ process and user. ...@@ -159,10 +159,10 @@ process and user.
.. function:: fsencode(value) .. function:: fsencode(value)
Encode *value* to bytes for use in the file system, environment variables or Encode *value* to bytes for use in the file system, environment variables or
the command line. Uses :func:`sys.getfilesystemencoding` and the command line. Use :func:`sys.getfilesystemencoding` and
``'surrogateescape'`` error handler for strings and returns bytes unchanged. ``'surrogateescape'`` error handler for strings and return bytes unchanged.
On Windows, use ``'strict'`` error handler for strings if the file system
Availability: Unix. encoding is ``'mbcs'`` (which is the default encoding).
.. versionadded:: 3.2 .. versionadded:: 3.2
......
...@@ -533,16 +533,19 @@ if supports_bytes_environ: ...@@ -533,16 +533,19 @@ if supports_bytes_environ:
return environb.get(key, default) return environb.get(key, default)
__all__.append("getenvb") __all__.append("getenvb")
if name != 'nt': def fsencode(value):
def fsencode(value): """Encode value for use in the file system, environment variables
"""Encode value for use in the file system, environment variables or the command line."""
or the command line.""" if isinstance(value, bytes):
if isinstance(value, bytes): return value
return value elif isinstance(value, str):
elif isinstance(value, str): encoding = sys.getfilesystemencoding()
return value.encode(sys.getfilesystemencoding(), 'surrogateescape') if encoding == 'mbcs':
return value.encode(encoding)
else: else:
raise TypeError("expect bytes or str, not %s" % type(value).__name__) return value.encode(encoding, 'surrogateescape')
else:
raise TypeError("expect bytes or str, not %s" % type(value).__name__)
def _exists(name): def _exists(name):
return name in globals() return name in globals()
......
...@@ -33,16 +33,15 @@ else: ...@@ -33,16 +33,15 @@ else:
HOST = support.HOST HOST = support.HOST
data_file = lambda name: os.path.join(os.path.dirname(__file__), name) data_file = lambda name: os.path.join(os.path.dirname(__file__), name)
fsencode = lambda name: name.encode(sys.getfilesystemencoding(), "surrogateescape")
CERTFILE = data_file("keycert.pem") CERTFILE = data_file("keycert.pem")
BYTES_CERTFILE = fsencode(CERTFILE) BYTES_CERTFILE = os.fsencode(CERTFILE)
ONLYCERT = data_file("ssl_cert.pem") ONLYCERT = data_file("ssl_cert.pem")
ONLYKEY = data_file("ssl_key.pem") ONLYKEY = data_file("ssl_key.pem")
BYTES_ONLYCERT = fsencode(ONLYCERT) BYTES_ONLYCERT = os.fsencode(ONLYCERT)
BYTES_ONLYKEY = fsencode(ONLYKEY) BYTES_ONLYKEY = os.fsencode(ONLYKEY)
CAPATH = data_file("capath") CAPATH = data_file("capath")
BYTES_CAPATH = fsencode(CAPATH) BYTES_CAPATH = os.fsencode(CAPATH)
SVN_PYTHON_ORG_ROOT_CERT = data_file("https_svn_python_org_root.pem") SVN_PYTHON_ORG_ROOT_CERT = data_file("https_svn_python_org_root.pem")
......
...@@ -12,6 +12,9 @@ What's New in Python 3.2 Alpha 1? ...@@ -12,6 +12,9 @@ What's New in Python 3.2 Alpha 1?
Core and Builtins Core and Builtins
----------------- -----------------
- Issue #8969: On Windows, use mbcs codec in strict mode to encode and decode
filenames and enable os.fsencode().
- Issue #8941: decoding big endian UTF-32 data in UCS-2 builds could crash - Issue #8941: decoding big endian UTF-32 data in UCS-2 builds could crash
the interpreter with characters outside the Basic Multilingual Plane the interpreter with characters outside the Basic Multilingual Plane
(higher than 0x10000). (higher than 0x10000).
......
...@@ -1478,11 +1478,17 @@ PyObject *PyUnicode_AsEncodedObject(PyObject *unicode, ...@@ -1478,11 +1478,17 @@ PyObject *PyUnicode_AsEncodedObject(PyObject *unicode,
PyObject *PyUnicode_EncodeFSDefault(PyObject *unicode) PyObject *PyUnicode_EncodeFSDefault(PyObject *unicode)
{ {
if (Py_FileSystemDefaultEncoding) if (Py_FileSystemDefaultEncoding) {
#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
if (strcmp(Py_FileSystemDefaultEncoding, "mbcs") == 0)
return PyUnicode_EncodeMBCS(PyUnicode_AS_UNICODE(unicode),
PyUnicode_GET_SIZE(unicode),
NULL);
#endif
return PyUnicode_AsEncodedString(unicode, return PyUnicode_AsEncodedString(unicode,
Py_FileSystemDefaultEncoding, Py_FileSystemDefaultEncoding,
"surrogateescape"); "surrogateescape");
else } else
return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode), return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode),
PyUnicode_GET_SIZE(unicode), PyUnicode_GET_SIZE(unicode),
"surrogateescape"); "surrogateescape");
...@@ -1639,7 +1645,7 @@ PyUnicode_DecodeFSDefaultAndSize(const char *s, Py_ssize_t size) ...@@ -1639,7 +1645,7 @@ PyUnicode_DecodeFSDefaultAndSize(const char *s, Py_ssize_t size)
if (Py_FileSystemDefaultEncoding) { if (Py_FileSystemDefaultEncoding) {
#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T) #if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
if (strcmp(Py_FileSystemDefaultEncoding, "mbcs") == 0) { if (strcmp(Py_FileSystemDefaultEncoding, "mbcs") == 0) {
return PyUnicode_DecodeMBCS(s, size, "surrogateescape"); return PyUnicode_DecodeMBCS(s, size, NULL);
} }
#elif defined(__APPLE__) #elif defined(__APPLE__)
if (strcmp(Py_FileSystemDefaultEncoding, "utf-8") == 0) { if (strcmp(Py_FileSystemDefaultEncoding, "utf-8") == 0) {
...@@ -2745,7 +2751,7 @@ PyUnicode_DecodeUTF32Stateful(const char *s, ...@@ -2745,7 +2751,7 @@ PyUnicode_DecodeUTF32Stateful(const char *s,
#endif #endif
PyObject *errorHandler = NULL; PyObject *errorHandler = NULL;
PyObject *exc = NULL; PyObject *exc = NULL;
q = (unsigned char *)s; q = (unsigned char *)s;
e = q + size; e = q + size;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment