Commit eb81795d authored by Nick Coghlan's avatar Nick Coghlan Committed by GitHub

bpo-30565: Add PYTHONCOERCECLOCALE=warn runtime flag (GH-2260)

- removes PY_WARN_ON_C_LOCALE build time flag
- locale coercion and compatibility warnings are now always compiled
  in, but are off by default
- adds PYTHONCOERCECLOCALE=warn runtime option to aid in
  debugging potentially locale related compatibility problems

Due to not-yet-resolved test failures on *BSD systems (including
Mac OS X), this also temporarily disables UTF-8 as a locale coercion
target, and skips testing the interpreter's behavior in the POSIX locale.
parent 6a98a04e
...@@ -744,6 +744,11 @@ conflict. ...@@ -744,6 +744,11 @@ conflict.
:data:`sys.stdin` and :data:`sys.stdout` to ``surrogateescape``. This :data:`sys.stdin` and :data:`sys.stdout` to ``surrogateescape``. This
behavior can be overridden using :envvar:`PYTHONIOENCODING` as usual. behavior can be overridden using :envvar:`PYTHONIOENCODING` as usual.
For debugging purposes, setting ``PYTHONCOERCECLOCALE=warn`` will cause
Python to emit warning messages on ``stderr`` if either the locale coercion
activates, or else if a locale that *would* have triggered coercion is
still active when the Python runtime is initialized.
Availability: \*nix Availability: \*nix
.. versionadded:: 3.7 .. versionadded:: 3.7
......
...@@ -96,20 +96,11 @@ defined coercion target locales (currently ``C.UTF-8``, ``C.utf8``, and ...@@ -96,20 +96,11 @@ defined coercion target locales (currently ``C.UTF-8``, ``C.utf8``, and
``UTF-8``). The default error handler for ``stderr`` continues to be ``UTF-8``). The default error handler for ``stderr`` continues to be
``backslashreplace``, regardless of locale. ``backslashreplace``, regardless of locale.
.. note:: Locale coercion is silent by default, but to assist in debugging potentially
locale related integration problems, explicit warnings (emitted directly on
In the current implementation, a warning message is printed directly to ``stderr`` can be requested by setting ``PYTHONCOERCECLOCALE=warn``. This
``stderr`` even for successful implicit locale coercion. This gives setting will also cause the Python runtime to emit a warning if the legacy C
redistributors and system integrators the opportunity to determine if they locale remains active when the core interpreter is initialized.
should be making an environmental change to avoid the need for implicit
coercion at the Python interpreter level.
However, it's not clear that this is going to be the best approach for
the final 3.7.0 release, and we may end up deciding to disable the warning
by default and provide some way of opting into it at runtime or build time.
Concrete examples of use cases where it would be preferrable to disable the
warning by default can be noted on :issue:`30565`.
.. seealso:: .. seealso::
......
This diff is collapsed.
...@@ -105,10 +105,10 @@ static const char usage_6[] = ...@@ -105,10 +105,10 @@ static const char usage_6[] =
" predictable seed.\n" " predictable seed.\n"
"PYTHONMALLOC: set the Python memory allocators and/or install debug hooks\n" "PYTHONMALLOC: set the Python memory allocators and/or install debug hooks\n"
" on Python memory allocators. Use PYTHONMALLOC=debug to install debug\n" " on Python memory allocators. Use PYTHONMALLOC=debug to install debug\n"
" hooks.\n"; " hooks.\n"
static const char usage_7[] =
"PYTHONCOERCECLOCALE: if this variable is set to 0, it disables the locale\n" "PYTHONCOERCECLOCALE: if this variable is set to 0, it disables the locale\n"
" coercion behavior\n"; " coercion behavior. Use PYTHONCOERCECLOCALE=warn to request display of\n"
" locale coercion and locale compatibility warnings on stderr.\n";
static int static int
usage(int exitcode, const wchar_t* program) usage(int exitcode, const wchar_t* program)
...@@ -125,7 +125,6 @@ usage(int exitcode, const wchar_t* program) ...@@ -125,7 +125,6 @@ usage(int exitcode, const wchar_t* program)
fprintf(f, usage_4, (wint_t)DELIM); fprintf(f, usage_4, (wint_t)DELIM);
fprintf(f, usage_5, (wint_t)DELIM, PYTHONHOMEHELP); fprintf(f, usage_5, (wint_t)DELIM, PYTHONHOMEHELP);
fputs(usage_6, f); fputs(usage_6, f);
fputs(usage_7, f);
} }
return exitcode; return exitcode;
} }
......
...@@ -356,6 +356,10 @@ _Py_LegacyLocaleDetected(void) ...@@ -356,6 +356,10 @@ _Py_LegacyLocaleDetected(void)
{ {
#ifndef MS_WINDOWS #ifndef MS_WINDOWS
/* On non-Windows systems, the C locale is considered a legacy locale */ /* On non-Windows systems, the C locale is considered a legacy locale */
/* XXX (ncoghlan): some platforms (notably Mac OS X) don't appear to treat
* the POSIX locale as a simple alias for the C locale, so
* we may also want to check for that explicitly.
*/
const char *ctype_loc = setlocale(LC_CTYPE, NULL); const char *ctype_loc = setlocale(LC_CTYPE, NULL);
return ctype_loc != NULL && strcmp(ctype_loc, "C") == 0; return ctype_loc != NULL && strcmp(ctype_loc, "C") == 0;
#else #else
...@@ -364,6 +368,30 @@ _Py_LegacyLocaleDetected(void) ...@@ -364,6 +368,30 @@ _Py_LegacyLocaleDetected(void)
#endif #endif
} }
static const char *_C_LOCALE_WARNING =
"Python runtime initialized with LC_CTYPE=C (a locale with default ASCII "
"encoding), which may cause Unicode compatibility problems. Using C.UTF-8, "
"C.utf8, or UTF-8 (if available) as alternative Unicode-compatible "
"locales is recommended.\n";
static int
_legacy_locale_warnings_enabled(void)
{
const char *coerce_c_locale = getenv("PYTHONCOERCECLOCALE");
return (coerce_c_locale != NULL &&
strncmp(coerce_c_locale, "warn", 5) == 0);
}
static void
_emit_stderr_warning_for_legacy_locale(void)
{
if (_legacy_locale_warnings_enabled()) {
if (_Py_LegacyLocaleDetected()) {
fprintf(stderr, "%s", _C_LOCALE_WARNING);
}
}
}
typedef struct _CandidateLocale { typedef struct _CandidateLocale {
const char *locale_name; /* The locale to try as a coercion target */ const char *locale_name; /* The locale to try as a coercion target */
} _LocaleCoercionTarget; } _LocaleCoercionTarget;
...@@ -371,10 +399,17 @@ typedef struct _CandidateLocale { ...@@ -371,10 +399,17 @@ typedef struct _CandidateLocale {
static _LocaleCoercionTarget _TARGET_LOCALES[] = { static _LocaleCoercionTarget _TARGET_LOCALES[] = {
{"C.UTF-8"}, {"C.UTF-8"},
{"C.utf8"}, {"C.utf8"},
{"UTF-8"}, /* {"UTF-8"}, */
{NULL} {NULL}
}; };
/* XXX (ncoghlan): Using UTF-8 as a target locale is currently disabled due to
* problems encountered on *BSD systems with those test cases
* For additional details see:
* nl_langinfo CODESET error: https://bugs.python.org/issue30647
* locale handling differences: https://bugs.python.org/issue30672
*/
static char * static char *
get_default_standard_stream_error_handler(void) get_default_standard_stream_error_handler(void)
{ {
...@@ -419,7 +454,9 @@ _coerce_default_locale_settings(const _LocaleCoercionTarget *target) ...@@ -419,7 +454,9 @@ _coerce_default_locale_settings(const _LocaleCoercionTarget *target)
"Error setting LC_CTYPE, skipping C locale coercion\n"); "Error setting LC_CTYPE, skipping C locale coercion\n");
return; return;
} }
fprintf(stderr, _C_LOCALE_COERCION_WARNING, newloc); if (_legacy_locale_warnings_enabled()) {
fprintf(stderr, _C_LOCALE_COERCION_WARNING, newloc);
}
/* Reconfigure with the overridden environment variables */ /* Reconfigure with the overridden environment variables */
setlocale(LC_ALL, ""); setlocale(LC_ALL, "");
...@@ -465,26 +502,6 @@ _Py_CoerceLegacyLocale(void) ...@@ -465,26 +502,6 @@ _Py_CoerceLegacyLocale(void)
} }
#ifdef PY_WARN_ON_C_LOCALE
static const char *_C_LOCALE_WARNING =
"Python runtime initialized with LC_CTYPE=C (a locale with default ASCII "
"encoding), which may cause Unicode compatibility problems. Using C.UTF-8, "
"C.utf8, or UTF-8 (if available) as alternative Unicode-compatible "
"locales is recommended.\n";
static void
_emit_stderr_warning_for_c_locale(void)
{
const char *coerce_c_locale = getenv("PYTHONCOERCECLOCALE");
if (coerce_c_locale == NULL || strncmp(coerce_c_locale, "0", 2) != 0) {
if (_Py_LegacyLocaleDetected()) {
fprintf(stderr, "%s", _C_LOCALE_WARNING);
}
}
}
#endif
/* Global initializations. Can be undone by Py_Finalize(). Don't /* Global initializations. Can be undone by Py_Finalize(). Don't
call this twice without an intervening Py_Finalize() call. call this twice without an intervening Py_Finalize() call.
...@@ -561,9 +578,7 @@ void _Py_InitializeCore(const _PyCoreConfig *config) ...@@ -561,9 +578,7 @@ void _Py_InitializeCore(const _PyCoreConfig *config)
the locale's charset without having to switch the locale's charset without having to switch
locales. */ locales. */
setlocale(LC_CTYPE, ""); setlocale(LC_CTYPE, "");
#ifdef PY_WARN_ON_C_LOCALE _emit_stderr_warning_for_legacy_locale();
_emit_stderr_warning_for_c_locale();
#endif
#endif #endif
#endif #endif
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment