Commit 9e4994d4 authored by Victor Stinner's avatar Victor Stinner Committed by GitHub

bpo-34485: Enhance init_sys_streams() (GH-8978)

Python now gets the locale encoding with C code to initialize the encoding
of standard streams like sys.stdout. Moreover, the encoding is now
initialized to the Python codec name to get a normalized encoding name and
to ensure that the codec is loaded. The change avoids importing
_bootlocale and _locale modules at startup by default.

When the PYTHONIOENCODING environment variable only contains an encoding,
the error handler is now is now set explicitly to "strict".

Rename also get_default_standard_stream_error_handler() to
get_stdio_errors().

Reduce the buffer to format the "cpXXX" string (Windows locale encoding).
parent d500e530
...@@ -171,17 +171,17 @@ class EmbeddingTests(EmbeddingTestsMixin, unittest.TestCase): ...@@ -171,17 +171,17 @@ class EmbeddingTests(EmbeddingTestsMixin, unittest.TestCase):
"stdout: {out_encoding}:ignore", "stdout: {out_encoding}:ignore",
"stderr: {out_encoding}:backslashreplace", "stderr: {out_encoding}:backslashreplace",
"--- Set encoding only ---", "--- Set encoding only ---",
"Expected encoding: latin-1", "Expected encoding: iso8859-1",
"Expected errors: default", "Expected errors: default",
"stdin: latin-1:{errors}", "stdin: iso8859-1:{errors}",
"stdout: latin-1:{errors}", "stdout: iso8859-1:{errors}",
"stderr: latin-1:backslashreplace", "stderr: iso8859-1:backslashreplace",
"--- Set encoding and errors ---", "--- Set encoding and errors ---",
"Expected encoding: latin-1", "Expected encoding: iso8859-1",
"Expected errors: replace", "Expected errors: replace",
"stdin: latin-1:replace", "stdin: iso8859-1:replace",
"stdout: latin-1:replace", "stdout: iso8859-1:replace",
"stderr: latin-1:backslashreplace"]) "stderr: iso8859-1:backslashreplace"])
expected_output = expected_output.format( expected_output = expected_output.format(
in_encoding=expected_stream_encoding, in_encoding=expected_stream_encoding,
out_encoding=expected_stream_encoding, out_encoding=expected_stream_encoding,
......
...@@ -668,7 +668,7 @@ class SysModuleTest(unittest.TestCase): ...@@ -668,7 +668,7 @@ class SysModuleTest(unittest.TestCase):
'dump("stdout")', 'dump("stdout")',
'dump("stderr")', 'dump("stderr")',
)) ))
args = [sys.executable, "-c", code] args = [sys.executable, "-X", "utf8=0", "-c", code]
if isolated: if isolated:
args.append("-I") args.append("-I")
if encoding is not None: if encoding is not None:
...@@ -712,8 +712,8 @@ class SysModuleTest(unittest.TestCase): ...@@ -712,8 +712,8 @@ class SysModuleTest(unittest.TestCase):
# have no any effect # have no any effect
out = self.c_locale_get_error_handler(encoding=':') out = self.c_locale_get_error_handler(encoding=':')
self.assertEqual(out, self.assertEqual(out,
'stdin: strict\n' 'stdin: surrogateescape\n'
'stdout: strict\n' 'stdout: surrogateescape\n'
'stderr: backslashreplace\n') 'stderr: backslashreplace\n')
out = self.c_locale_get_error_handler(encoding='') out = self.c_locale_get_error_handler(encoding='')
self.assertEqual(out, self.assertEqual(out,
......
...@@ -139,16 +139,16 @@ class UTF8ModeTests(unittest.TestCase): ...@@ -139,16 +139,16 @@ class UTF8ModeTests(unittest.TestCase):
out = self.get_output('-X', 'utf8', '-c', code, out = self.get_output('-X', 'utf8', '-c', code,
PYTHONIOENCODING="latin1") PYTHONIOENCODING="latin1")
self.assertEqual(out.splitlines(), self.assertEqual(out.splitlines(),
['stdin: latin1/strict', ['stdin: iso8859-1/strict',
'stdout: latin1/strict', 'stdout: iso8859-1/strict',
'stderr: latin1/backslashreplace']) 'stderr: iso8859-1/backslashreplace'])
out = self.get_output('-X', 'utf8', '-c', code, out = self.get_output('-X', 'utf8', '-c', code,
PYTHONIOENCODING=":namereplace") PYTHONIOENCODING=":namereplace")
self.assertEqual(out.splitlines(), self.assertEqual(out.splitlines(),
['stdin: UTF-8/namereplace', ['stdin: utf-8/namereplace',
'stdout: UTF-8/namereplace', 'stdout: utf-8/namereplace',
'stderr: UTF-8/backslashreplace']) 'stderr: utf-8/backslashreplace'])
def test_io(self): def test_io(self):
code = textwrap.dedent(''' code = textwrap.dedent('''
......
Python now gets the locale encoding with C code to initialize the encoding
of standard streams like sys.stdout. Moreover, the encoding is now
initialized to the Python codec name to get a normalized encoding name and
to ensure that the codec is loaded. The change avoids importing _bootlocale
and _locale modules at startup by default.
Fix the error handler of standard streams like sys.stdout:
PYTHONIOENCODING=":" is now ignored instead of setting the error handler to
"strict".
...@@ -319,7 +319,7 @@ exit: ...@@ -319,7 +319,7 @@ exit:
static PyObject* static PyObject*
PyLocale_getdefaultlocale(PyObject* self, PyObject *Py_UNUSED(ignored)) PyLocale_getdefaultlocale(PyObject* self, PyObject *Py_UNUSED(ignored))
{ {
char encoding[100]; char encoding[20];
char locale[100]; char locale[100];
PyOS_snprintf(encoding, sizeof(encoding), "cp%d", GetACP()); PyOS_snprintf(encoding, sizeof(encoding), "cp%d", GetACP());
......
...@@ -113,9 +113,9 @@ static int test_forced_io_encoding(void) ...@@ -113,9 +113,9 @@ static int test_forced_io_encoding(void)
printf("--- Set errors only ---\n"); printf("--- Set errors only ---\n");
check_stdio_details(NULL, "ignore"); check_stdio_details(NULL, "ignore");
printf("--- Set encoding only ---\n"); printf("--- Set encoding only ---\n");
check_stdio_details("latin-1", NULL); check_stdio_details("iso8859-1", NULL);
printf("--- Set encoding and errors ---\n"); printf("--- Set encoding and errors ---\n");
check_stdio_details("latin-1", "replace"); check_stdio_details("iso8859-1", "replace");
/* Check calling after initialization fails */ /* Check calling after initialization fails */
Py_Initialize(); Py_Initialize();
......
...@@ -244,22 +244,26 @@ error: ...@@ -244,22 +244,26 @@ error:
return NULL; return NULL;
} }
static char* static _PyInitError
get_locale_encoding(void) get_locale_encoding(char **locale_encoding)
{ {
#if defined(HAVE_LANGINFO_H) && defined(CODESET) #ifdef MS_WINDOWS
char* codeset = nl_langinfo(CODESET); char encoding[20];
if (!codeset || codeset[0] == '\0') { PyOS_snprintf(encoding, sizeof(encoding), "cp%d", GetACP());
PyErr_SetString(PyExc_ValueError, "CODESET is not set or empty");
return NULL;
}
return get_codec_name(codeset);
#elif defined(__ANDROID__) #elif defined(__ANDROID__)
return get_codec_name("UTF-8"); const char *encoding = "UTF-8";
#else #else
PyErr_SetNone(PyExc_NotImplementedError); const char *encoding = nl_langinfo(CODESET);
return NULL; if (!encoding || encoding[0] == '\0') {
return _Py_INIT_USER_ERR("failed to get the locale encoding: "
"nl_langinfo(CODESET) failed");
}
#endif #endif
*locale_encoding = _PyMem_RawStrdup(encoding);
if (*locale_encoding == NULL) {
return _Py_INIT_NO_MEMORY();
}
return _Py_INIT_OK();
} }
static _PyInitError static _PyInitError
...@@ -397,7 +401,7 @@ static _LocaleCoercionTarget _TARGET_LOCALES[] = { ...@@ -397,7 +401,7 @@ static _LocaleCoercionTarget _TARGET_LOCALES[] = {
}; };
static const char * static const char *
get_default_standard_stream_error_handler(void) get_stdio_errors(void)
{ {
const char *ctype_loc = setlocale(LC_CTYPE, NULL); const char *ctype_loc = setlocale(LC_CTYPE, NULL);
if (ctype_loc != NULL) { if (ctype_loc != NULL) {
...@@ -417,8 +421,7 @@ get_default_standard_stream_error_handler(void) ...@@ -417,8 +421,7 @@ get_default_standard_stream_error_handler(void)
#endif #endif
} }
/* Otherwise return NULL to request the typical default error handler */ return "strict";
return NULL;
} }
#ifdef PY_COERCE_C_LOCALE #ifdef PY_COERCE_C_LOCALE
...@@ -1586,9 +1589,17 @@ initfsencoding(PyInterpreterState *interp) ...@@ -1586,9 +1589,17 @@ initfsencoding(PyInterpreterState *interp)
Py_HasFileSystemDefaultEncoding = 1; Py_HasFileSystemDefaultEncoding = 1;
} }
else { else {
Py_FileSystemDefaultEncoding = get_locale_encoding(); char *locale_encoding;
_PyInitError err = get_locale_encoding(&locale_encoding);
if (_Py_INIT_FAILED(err)) {
return err;
}
Py_FileSystemDefaultEncoding = get_codec_name(locale_encoding);
PyMem_RawFree(locale_encoding);
if (Py_FileSystemDefaultEncoding == NULL) { if (Py_FileSystemDefaultEncoding == NULL) {
return _Py_INIT_ERR("Unable to get the locale encoding"); return _Py_INIT_ERR("failed to get the Python codec "
"of the locale encoding");
} }
Py_HasFileSystemDefaultEncoding = 0; Py_HasFileSystemDefaultEncoding = 0;
...@@ -1787,6 +1798,8 @@ init_sys_streams(PyInterpreterState *interp) ...@@ -1787,6 +1798,8 @@ init_sys_streams(PyInterpreterState *interp)
PyObject * encoding_attr; PyObject * encoding_attr;
char *pythonioencoding = NULL; char *pythonioencoding = NULL;
const char *encoding, *errors; const char *encoding, *errors;
char *locale_encoding = NULL;
char *codec_name = NULL;
_PyInitError res = _Py_INIT_OK(); _PyInitError res = _Py_INIT_OK();
/* Hack to avoid a nasty recursion issue when Python is invoked /* Hack to avoid a nasty recursion issue when Python is invoked
...@@ -1838,21 +1851,46 @@ init_sys_streams(PyInterpreterState *interp) ...@@ -1838,21 +1851,46 @@ init_sys_streams(PyInterpreterState *interp)
errors = err; errors = err;
} }
} }
if (*pythonioencoding && !encoding) { if (!encoding && *pythonioencoding) {
encoding = pythonioencoding; encoding = pythonioencoding;
if (!errors) {
errors = "strict";
}
} }
} }
else if (interp->core_config.utf8_mode) {
encoding = "utf-8"; if (interp->core_config.utf8_mode) {
errors = "surrogateescape"; if (!encoding) {
encoding = "utf-8";
}
if (!errors) {
errors = "surrogateescape";
}
} }
if (!errors && !pythonioencoding) { if (!errors) {
/* Choose the default error handler based on the current locale */ /* Choose the default error handler based on the current locale */
errors = get_default_standard_stream_error_handler(); errors = get_stdio_errors();
} }
} }
if (encoding == NULL) {
_PyInitError err = get_locale_encoding(&locale_encoding);
if (_Py_INIT_FAILED(err)) {
return err;
}
encoding = locale_encoding;
}
codec_name = get_codec_name(encoding);
if (codec_name == NULL) {
PyErr_SetString(PyExc_RuntimeError,
"failed to get the Python codec name "
"of stdio encoding");
goto error;
}
encoding = codec_name;
/* Set sys.stdin */ /* Set sys.stdin */
fd = fileno(stdin); fd = fileno(stdin);
/* Under some conditions stdin, stdout and stderr may not be connected /* Under some conditions stdin, stdout and stderr may not be connected
...@@ -1928,6 +1966,8 @@ done: ...@@ -1928,6 +1966,8 @@ done:
PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &old_alloc); PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &old_alloc);
PyMem_RawFree(locale_encoding);
PyMem_RawFree(codec_name);
PyMem_Free(pythonioencoding); PyMem_Free(pythonioencoding);
Py_XDECREF(bimod); Py_XDECREF(bimod);
Py_XDECREF(iomod); Py_XDECREF(iomod);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment