Commit b2457efc authored by Victor Stinner's avatar Victor Stinner Committed by GitHub

bpo-34523: Add _PyCoreConfig.filesystem_encoding (GH-8963)

_PyCoreConfig_Read() is now responsible to choose the filesystem
encoding and error handler. Using Py_Main(), the encoding is now
chosen even before calling Py_Initialize().

_PyCoreConfig.filesystem_encoding is now the reference, instead of
Py_FileSystemDefaultEncoding, for the Python filesystem encoding.

Changes:

* Add filesystem_encoding and filesystem_errors to _PyCoreConfig
* _PyCoreConfig_Read() now reads the locale encoding for the file
  system encoding.
* PyUnicode_EncodeFSDefault() and PyUnicode_DecodeFSDefaultAndSize()
  now use the interpreter configuration rather than
  Py_FileSystemDefaultEncoding and Py_FileSystemDefaultEncodeErrors
  global configuration variables.
* Add _Py_SetFileSystemEncoding() and _Py_ClearFileSystemEncoding()
  private functions to only modify Py_FileSystemDefaultEncoding and
  Py_FileSystemDefaultEncodeErrors in coreconfig.c.
* _Py_CoerceLegacyLocale() now takes an int rather than
  _PyCoreConfig for the warning.
parent dfe0dc74
...@@ -66,6 +66,17 @@ typedef struct { ...@@ -66,6 +66,17 @@ typedef struct {
int coerce_c_locale; /* PYTHONCOERCECLOCALE, -1 means unknown */ int coerce_c_locale; /* PYTHONCOERCECLOCALE, -1 means unknown */
int coerce_c_locale_warn; /* PYTHONCOERCECLOCALE=warn */ int coerce_c_locale_warn; /* PYTHONCOERCECLOCALE=warn */
/* Python filesystem encoding and error handler: see
sys.getfilesystemencoding() and sys.getfilesystemencodeerrors().
Updated later by initfsencoding(). On Windows, can be updated by
sys._enablelegacywindowsfsencoding() at runtime.
See Py_FileSystemDefaultEncoding and Py_FileSystemDefaultEncodeErrors.
*/
char *filesystem_encoding;
char *filesystem_errors;
/* Enable UTF-8 mode? /* Enable UTF-8 mode?
Set by -X utf8 command line option and PYTHONUTF8 environment variable. Set by -X utf8 command line option and PYTHONUTF8 environment variable.
If set to -1 (default), inherit Py_UTF8Mode value. */ If set to -1 (default), inherit Py_UTF8Mode value. */
...@@ -325,6 +336,14 @@ PyAPI_FUNC(int) _PyCoreConfig_GetEnvDup( ...@@ -325,6 +336,14 @@ PyAPI_FUNC(int) _PyCoreConfig_GetEnvDup(
#endif #endif
#ifdef Py_BUILD_CORE
PyAPI_FUNC(int) _Py_SetFileSystemEncoding(
const char *encoding,
const char *errors);
PyAPI_FUNC(void) _Py_ClearFileSystemEncoding(void);
#endif
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif
......
...@@ -175,7 +175,7 @@ PyAPI_FUNC(int) _PyOS_URandomNonblock(void *buffer, Py_ssize_t size); ...@@ -175,7 +175,7 @@ PyAPI_FUNC(int) _PyOS_URandomNonblock(void *buffer, Py_ssize_t size);
/* Legacy locale support */ /* Legacy locale support */
#ifndef Py_LIMITED_API #ifndef Py_LIMITED_API
PyAPI_FUNC(void) _Py_CoerceLegacyLocale(const _PyCoreConfig *config); PyAPI_FUNC(void) _Py_CoerceLegacyLocale(int warn);
PyAPI_FUNC(int) _Py_LegacyLocaleDetected(void); PyAPI_FUNC(int) _Py_LegacyLocaleDetected(void);
PyAPI_FUNC(char *) _Py_SetLocaleFromEnv(int category); PyAPI_FUNC(char *) _Py_SetLocaleFromEnv(int category);
#endif #endif
......
...@@ -251,6 +251,8 @@ class EmbeddingTests(EmbeddingTestsMixin, unittest.TestCase): ...@@ -251,6 +251,8 @@ class EmbeddingTests(EmbeddingTestsMixin, unittest.TestCase):
class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase): class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase):
maxDiff = 4096 maxDiff = 4096
UTF8_MODE_ERRORS = ('surrogatepass' if sys.platform == 'win32'
else 'surrogateescape')
DEFAULT_CONFIG = { DEFAULT_CONFIG = {
'install_signal_handlers': 1, 'install_signal_handlers': 1,
'use_environment': 1, 'use_environment': 1,
...@@ -265,8 +267,12 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase): ...@@ -265,8 +267,12 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase):
'show_alloc_count': 0, 'show_alloc_count': 0,
'dump_refs': 0, 'dump_refs': 0,
'malloc_stats': 0, 'malloc_stats': 0,
'utf8_mode': 0,
# None means that the default encoding is read at runtime:
# see get_locale_encoding().
'filesystem_encoding': None,
'filesystem_errors': sys.getfilesystemencodeerrors(),
'utf8_mode': 0,
'coerce_c_locale': 0, 'coerce_c_locale': 0,
'coerce_c_locale_warn': 0, 'coerce_c_locale_warn': 0,
...@@ -297,6 +303,7 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase): ...@@ -297,6 +303,7 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase):
'_frozen': 0, '_frozen': 0,
} }
def get_stdio_encoding(self, env): def get_stdio_encoding(self, env):
code = 'import sys; print(sys.stdout.encoding, sys.stdout.errors)' code = 'import sys; print(sys.stdout.encoding, sys.stdout.errors)'
args = (sys.executable, '-c', code) args = (sys.executable, '-c', code)
...@@ -308,6 +315,29 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase): ...@@ -308,6 +315,29 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase):
out = proc.stdout.rstrip() out = proc.stdout.rstrip()
return out.split() return out.split()
def get_locale_encoding(self, isolated):
if sys.platform in ('win32', 'darwin') or support.is_android:
# Windows, macOS and Android use UTF-8
return "utf-8"
code = ('import codecs, locale, sys',
'locale.setlocale(locale.LC_CTYPE, "")',
'enc = locale.nl_langinfo(locale.CODESET)',
'enc = codecs.lookup(enc).name',
'print(enc)')
args = (sys.executable, '-c', '; '.join(code))
env = dict(os.environ)
if not isolated:
env['PYTHONCOERCECLOCALE'] = '0'
env['PYTHONUTF8'] = '0'
proc = subprocess.run(args, text=True, env=env,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
if proc.returncode:
raise Exception(f"failed to get the locale encoding: "
f"stdout={proc.stdout!r} stderr={proc.stderr!r}")
return proc.stdout.rstrip()
def check_config(self, testname, expected): def check_config(self, testname, expected):
expected = dict(self.DEFAULT_CONFIG, **expected) expected = dict(self.DEFAULT_CONFIG, **expected)
...@@ -326,6 +356,8 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase): ...@@ -326,6 +356,8 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase):
expected['stdio_encoding'] = res[0] expected['stdio_encoding'] = res[0]
if expected['stdio_errors'] is None: if expected['stdio_errors'] is None:
expected['stdio_errors'] = res[1] expected['stdio_errors'] = res[1]
if expected['filesystem_encoding'] is None:
expected['filesystem_encoding'] = self.get_locale_encoding(expected['isolated'])
for key, value in expected.items(): for key, value in expected.items():
expected[key] = str(value) expected[key] = str(value)
...@@ -357,7 +389,8 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase): ...@@ -357,7 +389,8 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase):
'utf8_mode': 1, 'utf8_mode': 1,
'stdio_encoding': 'utf-8', 'stdio_encoding': 'utf-8',
'stdio_errors': 'surrogateescape', 'stdio_errors': 'surrogateescape',
'filesystem_encoding': 'utf-8',
'filesystem_errors': self.UTF8_MODE_ERRORS,
'user_site_directory': 0, 'user_site_directory': 0,
'_frozen': 1, '_frozen': 1,
} }
...@@ -378,6 +411,8 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase): ...@@ -378,6 +411,8 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase):
'utf8_mode': 1, 'utf8_mode': 1,
'stdio_encoding': 'iso8859-1', 'stdio_encoding': 'iso8859-1',
'stdio_errors': 'replace', 'stdio_errors': 'replace',
'filesystem_encoding': 'utf-8',
'filesystem_errors': self.UTF8_MODE_ERRORS,
'pycache_prefix': 'conf_pycache_prefix', 'pycache_prefix': 'conf_pycache_prefix',
'program_name': './conf_program_name', 'program_name': './conf_program_name',
...@@ -409,6 +444,8 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase): ...@@ -409,6 +444,8 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase):
'import_time': 1, 'import_time': 1,
'malloc_stats': 1, 'malloc_stats': 1,
'utf8_mode': 1, 'utf8_mode': 1,
'filesystem_encoding': 'utf-8',
'filesystem_errors': self.UTF8_MODE_ERRORS,
'inspect': 1, 'inspect': 1,
'optimization_level': 2, 'optimization_level': 2,
'pycache_prefix': 'env_pycache_prefix', 'pycache_prefix': 'env_pycache_prefix',
......
...@@ -861,6 +861,16 @@ class SysModuleTest(unittest.TestCase): ...@@ -861,6 +861,16 @@ class SysModuleTest(unittest.TestCase):
def test_no_duplicates_in_meta_path(self): def test_no_duplicates_in_meta_path(self):
self.assertEqual(len(sys.meta_path), len(set(sys.meta_path))) self.assertEqual(len(sys.meta_path), len(set(sys.meta_path)))
@unittest.skipUnless(hasattr(sys, "_enablelegacywindowsfsencoding"),
'needs sys._enablelegacywindowsfsencoding()')
def test__enablelegacywindowsfsencoding(self):
code = ('import sys',
'sys._enablelegacywindowsfsencoding()',
'print(sys.getfilesystemencoding(), sys.getfilesystemencodeerrors())')
rc, out, err = assert_python_ok('-c', '; '.join(code))
out = out.decode('ascii', 'replace').rstrip()
self.assertEqual(out, 'mbcs replace')
@test.support.cpython_only @test.support.cpython_only
class SizeofTest(unittest.TestCase): class SizeofTest(unittest.TestCase):
......
The Python filesystem encoding is now read earlier during the Python
initialization.
...@@ -1339,7 +1339,7 @@ pymain_read_conf(_PyMain *pymain, _PyCoreConfig *config, ...@@ -1339,7 +1339,7 @@ pymain_read_conf(_PyMain *pymain, _PyCoreConfig *config,
*/ */
if (config->coerce_c_locale && !locale_coerced) { if (config->coerce_c_locale && !locale_coerced) {
locale_coerced = 1; locale_coerced = 1;
_Py_CoerceLegacyLocale(config); _Py_CoerceLegacyLocale(config->coerce_c_locale_warn);
encoding_changed = 1; encoding_changed = 1;
} }
......
...@@ -3410,27 +3410,24 @@ PyUnicode_EncodeLocale(PyObject *unicode, const char *errors) ...@@ -3410,27 +3410,24 @@ PyUnicode_EncodeLocale(PyObject *unicode, const char *errors)
PyObject * PyObject *
PyUnicode_EncodeFSDefault(PyObject *unicode) PyUnicode_EncodeFSDefault(PyObject *unicode)
{ {
PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE();
const _PyCoreConfig *config = &interp->core_config;
#if defined(__APPLE__) #if defined(__APPLE__)
return _PyUnicode_AsUTF8String(unicode, Py_FileSystemDefaultEncodeErrors); return _PyUnicode_AsUTF8String(unicode, config->filesystem_errors);
#else #else
PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE();
/* Bootstrap check: if the filesystem codec is implemented in Python, we /* Bootstrap check: if the filesystem codec is implemented in Python, we
cannot use it to encode and decode filenames before it is loaded. Load cannot use it to encode and decode filenames before it is loaded. Load
the Python codec requires to encode at least its own filename. Use the C the Python codec requires to encode at least its own filename. Use the C
version of the locale codec until the codec registry is initialized and implementation of the locale codec until the codec registry is
the Python codec is loaded. initialized and the Python codec is loaded. See initfsencoding(). */
if (interp->fscodec_initialized) {
Py_FileSystemDefaultEncoding is shared between all interpreters, we
cannot only rely on it: check also interp->fscodec_initialized for
subinterpreters. */
if (Py_FileSystemDefaultEncoding && interp->fscodec_initialized) {
return PyUnicode_AsEncodedString(unicode, return PyUnicode_AsEncodedString(unicode,
Py_FileSystemDefaultEncoding, config->filesystem_encoding,
Py_FileSystemDefaultEncodeErrors); config->filesystem_errors);
} }
else { else {
return unicode_encode_locale(unicode, return unicode_encode_locale(unicode,
Py_FileSystemDefaultEncodeErrors, 0); config->filesystem_errors, 0);
} }
#endif #endif
} }
...@@ -3636,27 +3633,24 @@ PyUnicode_DecodeFSDefault(const char *s) { ...@@ -3636,27 +3633,24 @@ PyUnicode_DecodeFSDefault(const char *s) {
PyObject* PyObject*
PyUnicode_DecodeFSDefaultAndSize(const char *s, Py_ssize_t size) PyUnicode_DecodeFSDefaultAndSize(const char *s, Py_ssize_t size)
{ {
PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE();
const _PyCoreConfig *config = &interp->core_config;
#if defined(__APPLE__) #if defined(__APPLE__)
return PyUnicode_DecodeUTF8Stateful(s, size, Py_FileSystemDefaultEncodeErrors, NULL); return PyUnicode_DecodeUTF8Stateful(s, size, config->filesystem_errors, NULL);
#else #else
PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE();
/* Bootstrap check: if the filesystem codec is implemented in Python, we /* Bootstrap check: if the filesystem codec is implemented in Python, we
cannot use it to encode and decode filenames before it is loaded. Load cannot use it to encode and decode filenames before it is loaded. Load
the Python codec requires to encode at least its own filename. Use the C the Python codec requires to encode at least its own filename. Use the C
version of the locale codec until the codec registry is initialized and implementation of the locale codec until the codec registry is
the Python codec is loaded. initialized and the Python codec is loaded. See initfsencoding(). */
if (interp->fscodec_initialized) {
Py_FileSystemDefaultEncoding is shared between all interpreters, we
cannot only rely on it: check also interp->fscodec_initialized for
subinterpreters. */
if (Py_FileSystemDefaultEncoding && interp->fscodec_initialized) {
return PyUnicode_Decode(s, size, return PyUnicode_Decode(s, size,
Py_FileSystemDefaultEncoding, config->filesystem_encoding,
Py_FileSystemDefaultEncodeErrors); config->filesystem_errors);
} }
else { else {
return unicode_decode_locale(s, size, return unicode_decode_locale(s, size,
Py_FileSystemDefaultEncodeErrors, 0); config->filesystem_errors, 0);
} }
#endif #endif
} }
......
...@@ -81,8 +81,15 @@ main(int argc, char *argv[]) ...@@ -81,8 +81,15 @@ main(int argc, char *argv[])
config.program_name = L"./_freeze_importlib"; config.program_name = L"./_freeze_importlib";
/* Don't install importlib, since it could execute outdated bytecode. */ /* Don't install importlib, since it could execute outdated bytecode. */
config._install_importlib = 0; config._install_importlib = 0;
config.install_signal_handlers = 1;
config._frozen = 1; config._frozen = 1;
#ifdef MS_WINDOWS
/* bpo-34523: initfsencoding() is not called if _install_importlib=0,
so interp->fscodec_initialized value remains 0.
PyUnicode_EncodeFSDefault() doesn't support the "surrogatepass" error
handler in such case, whereas it's the default error handler on Windows.
Force the "strict" error handler to work around this bootstrap issue. */
config.filesystem_errors = "strict";
#endif
_PyInitError err = _Py_InitializeFromConfig(&config); _PyInitError err = _Py_InitializeFromConfig(&config);
/* No need to call _PyCoreConfig_Clear() since we didn't allocate any /* No need to call _PyCoreConfig_Clear() since we didn't allocate any
......
...@@ -328,6 +328,8 @@ dump_config(void) ...@@ -328,6 +328,8 @@ dump_config(void)
printf("dump_refs = %i\n", config->dump_refs); printf("dump_refs = %i\n", config->dump_refs);
printf("malloc_stats = %i\n", config->malloc_stats); printf("malloc_stats = %i\n", config->malloc_stats);
printf("filesystem_encoding = %s\n", config->filesystem_encoding);
printf("filesystem_errors = %s\n", config->filesystem_errors);
printf("coerce_c_locale = %i\n", config->coerce_c_locale); printf("coerce_c_locale = %i\n", config->coerce_c_locale);
printf("coerce_c_locale_warn = %i\n", config->coerce_c_locale_warn); printf("coerce_c_locale_warn = %i\n", config->coerce_c_locale_warn);
printf("utf8_mode = %i\n", config->utf8_mode); printf("utf8_mode = %i\n", config->utf8_mode);
......
...@@ -5,6 +5,11 @@ ...@@ -5,6 +5,11 @@
# include <langinfo.h> # include <langinfo.h>
#endif #endif
#include <locale.h> /* setlocale() */
#ifdef HAVE_LANGINFO_H
#include <langinfo.h> /* nl_langinfo(CODESET) */
#endif
#define DECODE_LOCALE_ERR(NAME, LEN) \ #define DECODE_LOCALE_ERR(NAME, LEN) \
(((LEN) == -2) \ (((LEN) == -2) \
...@@ -32,6 +37,8 @@ const char *Py_FileSystemDefaultEncoding = NULL; /* set by initfsencoding() */ ...@@ -32,6 +37,8 @@ const char *Py_FileSystemDefaultEncoding = NULL; /* set by initfsencoding() */
int Py_HasFileSystemDefaultEncoding = 0; int Py_HasFileSystemDefaultEncoding = 0;
#endif #endif
const char *Py_FileSystemDefaultEncodeErrors = "surrogateescape"; const char *Py_FileSystemDefaultEncodeErrors = "surrogateescape";
static int _Py_HasFileSystemDefaultEncodeErrors = 1;
/* UTF-8 mode (PEP 540): if equals to 1, use the UTF-8 encoding, and change /* UTF-8 mode (PEP 540): if equals to 1, use the UTF-8 encoding, and change
stdin and stdout error handler to "surrogateescape". It is equal to stdin and stdout error handler to "surrogateescape". It is equal to
-1 by default: unknown, will be set by Py_Main() */ -1 by default: unknown, will be set by Py_Main() */
...@@ -88,6 +95,47 @@ _Py_wstrlist_copy(int len, wchar_t **list) ...@@ -88,6 +95,47 @@ _Py_wstrlist_copy(int len, wchar_t **list)
} }
void
_Py_ClearFileSystemEncoding(void)
{
if (!Py_HasFileSystemDefaultEncoding && Py_FileSystemDefaultEncoding) {
PyMem_RawFree((char*)Py_FileSystemDefaultEncoding);
Py_FileSystemDefaultEncoding = NULL;
}
if (!_Py_HasFileSystemDefaultEncodeErrors && Py_FileSystemDefaultEncodeErrors) {
PyMem_RawFree((char*)Py_FileSystemDefaultEncodeErrors);
Py_FileSystemDefaultEncodeErrors = NULL;
}
}
/* Set Py_FileSystemDefaultEncoding and Py_FileSystemDefaultEncodeErrors
global configuration variables. */
int
_Py_SetFileSystemEncoding(const char *encoding, const char *errors)
{
char *encoding2 = _PyMem_RawStrdup(encoding);
if (encoding2 == NULL) {
return -1;
}
char *errors2 = _PyMem_RawStrdup(errors);
if (errors2 == NULL) {
PyMem_RawFree(encoding2);
return -1;
}
_Py_ClearFileSystemEncoding();
Py_FileSystemDefaultEncoding = encoding2;
Py_HasFileSystemDefaultEncoding = 0;
Py_FileSystemDefaultEncodeErrors = errors2;
_Py_HasFileSystemDefaultEncodeErrors = 0;
return 0;
}
/* Helper to allow an embedding application to override the normal /* Helper to allow an embedding application to override the normal
* mechanism that attempts to figure out an appropriate IO encoding * mechanism that attempts to figure out an appropriate IO encoding
*/ */
...@@ -209,6 +257,8 @@ _PyCoreConfig_Clear(_PyCoreConfig *config) ...@@ -209,6 +257,8 @@ _PyCoreConfig_Clear(_PyCoreConfig *config)
#endif #endif
CLEAR(config->base_exec_prefix); CLEAR(config->base_exec_prefix);
CLEAR(config->filesystem_encoding);
CLEAR(config->filesystem_errors);
CLEAR(config->stdio_encoding); CLEAR(config->stdio_encoding);
CLEAR(config->stdio_errors); CLEAR(config->stdio_errors);
#undef CLEAR #undef CLEAR
...@@ -302,6 +352,8 @@ _PyCoreConfig_Copy(_PyCoreConfig *config, const _PyCoreConfig *config2) ...@@ -302,6 +352,8 @@ _PyCoreConfig_Copy(_PyCoreConfig *config, const _PyCoreConfig *config2)
COPY_ATTR(quiet); COPY_ATTR(quiet);
COPY_ATTR(user_site_directory); COPY_ATTR(user_site_directory);
COPY_ATTR(buffered_stdio); COPY_ATTR(buffered_stdio);
COPY_STR_ATTR(filesystem_encoding);
COPY_STR_ATTR(filesystem_errors);
COPY_STR_ATTR(stdio_encoding); COPY_STR_ATTR(stdio_encoding);
COPY_STR_ATTR(stdio_errors); COPY_STR_ATTR(stdio_errors);
#ifdef MS_WINDOWS #ifdef MS_WINDOWS
...@@ -312,6 +364,7 @@ _PyCoreConfig_Copy(_PyCoreConfig *config, const _PyCoreConfig *config2) ...@@ -312,6 +364,7 @@ _PyCoreConfig_Copy(_PyCoreConfig *config, const _PyCoreConfig *config2)
COPY_ATTR(_frozen); COPY_ATTR(_frozen);
#undef COPY_ATTR #undef COPY_ATTR
#undef COPY_STR_ATTR
#undef COPY_WSTR_ATTR #undef COPY_WSTR_ATTR
#undef COPY_WSTRLIST #undef COPY_WSTRLIST
return 0; return 0;
...@@ -976,8 +1029,8 @@ get_stdio_errors(const _PyCoreConfig *config) ...@@ -976,8 +1029,8 @@ get_stdio_errors(const _PyCoreConfig *config)
} }
_PyInitError static _PyInitError
_Py_get_locale_encoding(char **locale_encoding) get_locale_encoding(char **locale_encoding)
{ {
#ifdef MS_WINDOWS #ifdef MS_WINDOWS
char encoding[20]; char encoding[20];
...@@ -1087,7 +1140,7 @@ config_init_stdio_encoding(_PyCoreConfig *config) ...@@ -1087,7 +1140,7 @@ config_init_stdio_encoding(_PyCoreConfig *config)
/* Choose the default error handler based on the current locale. */ /* Choose the default error handler based on the current locale. */
if (config->stdio_encoding == NULL) { if (config->stdio_encoding == NULL) {
_PyInitError err = _Py_get_locale_encoding(&config->stdio_encoding); _PyInitError err = get_locale_encoding(&config->stdio_encoding);
if (_Py_INIT_FAILED(err)) { if (_Py_INIT_FAILED(err)) {
return err; return err;
} }
...@@ -1104,6 +1157,81 @@ config_init_stdio_encoding(_PyCoreConfig *config) ...@@ -1104,6 +1157,81 @@ config_init_stdio_encoding(_PyCoreConfig *config)
} }
static _PyInitError
config_init_fs_encoding(_PyCoreConfig *config)
{
#ifdef MS_WINDOWS
if (config->legacy_windows_fs_encoding) {
/* Legacy Windows filesystem encoding: mbcs/replace */
if (config->filesystem_encoding == NULL) {
config->filesystem_encoding = _PyMem_RawStrdup("mbcs");
if (config->filesystem_encoding == NULL) {
return _Py_INIT_NO_MEMORY();
}
}
if (config->filesystem_errors == NULL) {
config->filesystem_errors = _PyMem_RawStrdup("replace");
if (config->filesystem_errors == NULL) {
return _Py_INIT_NO_MEMORY();
}
}
}
/* Windows defaults to utf-8/surrogatepass (PEP 529) */
if (config->filesystem_encoding == NULL) {
config->filesystem_encoding = _PyMem_RawStrdup("utf-8");
if (config->filesystem_encoding == NULL) {
return _Py_INIT_NO_MEMORY();
}
}
if (config->filesystem_errors == NULL) {
config->filesystem_errors = _PyMem_RawStrdup("surrogatepass");
if (config->filesystem_errors == NULL) {
return _Py_INIT_NO_MEMORY();
}
}
#else
if (config->utf8_mode) {
/* UTF-8 Mode use: utf-8/surrogateescape */
if (config->filesystem_encoding == NULL) {
config->filesystem_encoding = _PyMem_RawStrdup("utf-8");
if (config->filesystem_encoding == NULL) {
return _Py_INIT_NO_MEMORY();
}
}
/* errors defaults to surrogateescape above */
}
if (config->filesystem_encoding == NULL) {
/* macOS and Android use UTF-8, other platforms use
the locale encoding. */
char *locale_encoding;
#if defined(__APPLE__) || defined(__ANDROID__)
locale_encoding = "UTF-8";
#else
_PyInitError err = get_locale_encoding(&locale_encoding);
if (_Py_INIT_FAILED(err)) {
return err;
}
#endif
config->filesystem_encoding = _PyMem_RawStrdup(locale_encoding);
if (config->filesystem_encoding == NULL) {
return _Py_INIT_NO_MEMORY();
}
}
if (config->filesystem_errors == NULL) {
/* by default, use the "surrogateescape" error handler */
config->filesystem_errors = _PyMem_RawStrdup("surrogateescape");
if (config->filesystem_errors == NULL) {
return _Py_INIT_NO_MEMORY();
}
}
#endif
return _Py_INIT_OK();
}
/* Read configuration settings from standard locations /* Read configuration settings from standard locations
* *
* This function doesn't make any changes to the interpreter state - it * This function doesn't make any changes to the interpreter state - it
...@@ -1216,6 +1344,13 @@ _PyCoreConfig_Read(_PyCoreConfig *config) ...@@ -1216,6 +1344,13 @@ _PyCoreConfig_Read(_PyCoreConfig *config)
config->argc = 0; config->argc = 0;
} }
if (config->filesystem_encoding == NULL && config->filesystem_errors == NULL) {
err = config_init_fs_encoding(config);
if (_Py_INIT_FAILED(err)) {
return err;
}
}
err = config_init_stdio_encoding(config); err = config_init_stdio_encoding(config);
if (_Py_INIT_FAILED(err)) { if (_Py_INIT_FAILED(err)) {
return err; return err;
...@@ -1223,6 +1358,10 @@ _PyCoreConfig_Read(_PyCoreConfig *config) ...@@ -1223,6 +1358,10 @@ _PyCoreConfig_Read(_PyCoreConfig *config)
assert(config->coerce_c_locale >= 0); assert(config->coerce_c_locale >= 0);
assert(config->use_environment >= 0); assert(config->use_environment >= 0);
assert(config->filesystem_encoding != NULL);
assert(config->filesystem_errors != NULL);
assert(config->stdio_encoding != NULL);
assert(config->stdio_errors != NULL);
return _Py_INIT_OK(); return _Py_INIT_OK();
} }
...@@ -339,7 +339,7 @@ static const char C_LOCALE_COERCION_WARNING[] = ...@@ -339,7 +339,7 @@ static const char C_LOCALE_COERCION_WARNING[] =
"or PYTHONCOERCECLOCALE=0 to disable this locale coercion behavior).\n"; "or PYTHONCOERCECLOCALE=0 to disable this locale coercion behavior).\n";
static void static void
_coerce_default_locale_settings(const _PyCoreConfig *config, const _LocaleCoercionTarget *target) _coerce_default_locale_settings(int warn, const _LocaleCoercionTarget *target)
{ {
const char *newloc = target->locale_name; const char *newloc = target->locale_name;
...@@ -352,7 +352,7 @@ _coerce_default_locale_settings(const _PyCoreConfig *config, const _LocaleCoerci ...@@ -352,7 +352,7 @@ _coerce_default_locale_settings(const _PyCoreConfig *config, const _LocaleCoerci
"Error setting LC_CTYPE, skipping C locale coercion\n"); "Error setting LC_CTYPE, skipping C locale coercion\n");
return; return;
} }
if (config->coerce_c_locale_warn) { if (warn) {
fprintf(stderr, C_LOCALE_COERCION_WARNING, newloc); fprintf(stderr, C_LOCALE_COERCION_WARNING, newloc);
} }
...@@ -362,7 +362,7 @@ _coerce_default_locale_settings(const _PyCoreConfig *config, const _LocaleCoerci ...@@ -362,7 +362,7 @@ _coerce_default_locale_settings(const _PyCoreConfig *config, const _LocaleCoerci
#endif #endif
void void
_Py_CoerceLegacyLocale(const _PyCoreConfig *config) _Py_CoerceLegacyLocale(int warn)
{ {
#ifdef PY_COERCE_C_LOCALE #ifdef PY_COERCE_C_LOCALE
const char *locale_override = getenv("LC_ALL"); const char *locale_override = getenv("LC_ALL");
...@@ -385,7 +385,7 @@ defined(HAVE_LANGINFO_H) && defined(CODESET) ...@@ -385,7 +385,7 @@ defined(HAVE_LANGINFO_H) && defined(CODESET)
} }
#endif #endif
/* Successfully configured locale, so make it the default */ /* Successfully configured locale, so make it the default */
_coerce_default_locale_settings(config, target); _coerce_default_locale_settings(warn, target);
return; return;
} }
} }
...@@ -1162,11 +1162,7 @@ Py_FinalizeEx(void) ...@@ -1162,11 +1162,7 @@ Py_FinalizeEx(void)
/* Cleanup Unicode implementation */ /* Cleanup Unicode implementation */
_PyUnicode_Fini(); _PyUnicode_Fini();
/* reset file system default encoding */ _Py_ClearFileSystemEncoding();
if (!Py_HasFileSystemDefaultEncoding && Py_FileSystemDefaultEncoding) {
PyMem_RawFree((char*)Py_FileSystemDefaultEncoding);
Py_FileSystemDefaultEncoding = NULL;
}
/* XXX Still allocated: /* XXX Still allocated:
- various static ad-hoc pointers to interned strings - various static ad-hoc pointers to interned strings
...@@ -1475,59 +1471,31 @@ add_main_module(PyInterpreterState *interp) ...@@ -1475,59 +1471,31 @@ add_main_module(PyInterpreterState *interp)
static _PyInitError static _PyInitError
initfsencoding(PyInterpreterState *interp) initfsencoding(PyInterpreterState *interp)
{ {
PyObject *codec; _PyCoreConfig *config = &interp->core_config;
#ifdef MS_WINDOWS
if (Py_LegacyWindowsFSEncodingFlag) {
Py_FileSystemDefaultEncoding = "mbcs";
Py_FileSystemDefaultEncodeErrors = "replace";
}
else {
Py_FileSystemDefaultEncoding = "utf-8";
Py_FileSystemDefaultEncodeErrors = "surrogatepass";
}
#else
if (Py_FileSystemDefaultEncoding == NULL) {
if (interp->core_config.utf8_mode) {
Py_FileSystemDefaultEncoding = "utf-8";
Py_HasFileSystemDefaultEncoding = 1;
}
else if (_Py_GetForceASCII()) {
Py_FileSystemDefaultEncoding = "ascii";
Py_HasFileSystemDefaultEncoding = 1;
}
else {
extern _PyInitError _Py_get_locale_encoding(char **locale_encoding);
char *locale_encoding;
_PyInitError err = _Py_get_locale_encoding(&locale_encoding);
if (_Py_INIT_FAILED(err)) {
return err;
}
Py_FileSystemDefaultEncoding = get_codec_name(locale_encoding); char *encoding = get_codec_name(config->filesystem_encoding);
PyMem_RawFree(locale_encoding); if (encoding == NULL) {
if (Py_FileSystemDefaultEncoding == NULL) { /* Such error can only occurs in critical situations: no more
memory, import a module of the standard library failed, etc. */
return _Py_INIT_ERR("failed to get the Python codec " return _Py_INIT_ERR("failed to get the Python codec "
"of the locale encoding"); "of the filesystem encoding");
} }
Py_HasFileSystemDefaultEncoding = 0; /* Update the filesystem encoding to the normalized Python codec name.
interp->fscodec_initialized = 1; For example, replace "ANSI_X3.4-1968" (locale encoding) with "ascii"
return _Py_INIT_OK(); (Python codec name). */
} PyMem_RawFree(config->filesystem_encoding);
} config->filesystem_encoding = encoding;
#endif
/* the encoding is mbcs, utf-8 or ascii */ /* Set Py_FileSystemDefaultEncoding and Py_FileSystemDefaultEncodeErrors
codec = _PyCodec_Lookup(Py_FileSystemDefaultEncoding); global configuration variables. */
if (!codec) { if (_Py_SetFileSystemEncoding(config->filesystem_encoding,
/* Such error can only occurs in critical situations: no more config->filesystem_errors) < 0) {
* memory, import a module of the standard library failed, return _Py_INIT_NO_MEMORY();
* etc. */
return _Py_INIT_ERR("unable to load the file system codec");
} }
Py_DECREF(codec);
/* PyUnicode can now use the Python codec rather than C implementation
for the filesystem encoding */
interp->fscodec_initialized = 1; interp->fscodec_initialized = 1;
return _Py_INIT_OK(); return _Py_INIT_OK();
} }
......
...@@ -389,11 +389,9 @@ implementation." ...@@ -389,11 +389,9 @@ implementation."
static PyObject * static PyObject *
sys_getfilesystemencoding(PyObject *self, PyObject *Py_UNUSED(ignored)) sys_getfilesystemencoding(PyObject *self, PyObject *Py_UNUSED(ignored))
{ {
if (Py_FileSystemDefaultEncoding) PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE();
return PyUnicode_FromString(Py_FileSystemDefaultEncoding); const _PyCoreConfig *config = &interp->core_config;
PyErr_SetString(PyExc_RuntimeError, return PyUnicode_FromString(config->filesystem_encoding);
"filesystem encoding is not initialized");
return NULL;
} }
PyDoc_STRVAR(getfilesystemencoding_doc, PyDoc_STRVAR(getfilesystemencoding_doc,
...@@ -406,11 +404,9 @@ operating system filenames." ...@@ -406,11 +404,9 @@ operating system filenames."
static PyObject * static PyObject *
sys_getfilesystemencodeerrors(PyObject *self, PyObject *Py_UNUSED(ignored)) sys_getfilesystemencodeerrors(PyObject *self, PyObject *Py_UNUSED(ignored))
{ {
if (Py_FileSystemDefaultEncodeErrors) PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE();
return PyUnicode_FromString(Py_FileSystemDefaultEncodeErrors); const _PyCoreConfig *config = &interp->core_config;
PyErr_SetString(PyExc_RuntimeError, return PyUnicode_FromString(config->filesystem_errors);
"filesystem encoding is not initialized");
return NULL;
} }
PyDoc_STRVAR(getfilesystemencodeerrors_doc, PyDoc_STRVAR(getfilesystemencodeerrors_doc,
...@@ -1150,8 +1146,30 @@ environment variable before launching Python." ...@@ -1150,8 +1146,30 @@ environment variable before launching Python."
static PyObject * static PyObject *
sys_enablelegacywindowsfsencoding(PyObject *self) sys_enablelegacywindowsfsencoding(PyObject *self)
{ {
Py_FileSystemDefaultEncoding = "mbcs"; PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE();
Py_FileSystemDefaultEncodeErrors = "replace"; _PyCoreConfig *config = &interp->core_config;
/* Set the filesystem encoding to mbcs/replace (PEP 529) */
char *encoding = _PyMem_RawStrdup("mbcs");
char *errors = _PyMem_RawStrdup("replace");
if (encoding == NULL || errors == NULL) {
PyMem_Free(encoding);
PyMem_Free(errors);
PyErr_NoMemory();
return NULL;
}
PyMem_RawFree(config->filesystem_encoding);
config->filesystem_encoding = encoding;
PyMem_RawFree(config->filesystem_errors);
config->filesystem_errors = errors;
if (_Py_SetFileSystemEncoding(config->filesystem_encoding,
config->filesystem_errors) < 0) {
PyErr_NoMemory();
return NULL;
}
Py_RETURN_NONE; Py_RETURN_NONE;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment