Commit eb81795d authored by Nick Coghlan's avatar Nick Coghlan Committed by GitHub

bpo-30565: Add PYTHONCOERCECLOCALE=warn runtime flag (GH-2260)

- removes PY_WARN_ON_C_LOCALE build time flag
- locale coercion and compatibility warnings are now always compiled
  in, but are off by default
- adds PYTHONCOERCECLOCALE=warn runtime option to aid in
  debugging potentially locale related compatibility problems

Due to not-yet-resolved test failures on *BSD systems (including
Mac OS X), this also temporarily disables UTF-8 as a locale coercion
target, and skips testing the interpreter's behavior in the POSIX locale.
parent 6a98a04e
......@@ -744,6 +744,11 @@ conflict.
:data:`sys.stdin` and :data:`sys.stdout` to ``surrogateescape``. This
behavior can be overridden using :envvar:`PYTHONIOENCODING` as usual.
For debugging purposes, setting ``PYTHONCOERCECLOCALE=warn`` will cause
Python to emit warning messages on ``stderr`` if either the locale coercion
activates, or else if a locale that *would* have triggered coercion is
still active when the Python runtime is initialized.
Availability: \*nix
.. versionadded:: 3.7
......
......@@ -96,20 +96,11 @@ defined coercion target locales (currently ``C.UTF-8``, ``C.utf8``, and
``UTF-8``). The default error handler for ``stderr`` continues to be
``backslashreplace``, regardless of locale.
.. note::
In the current implementation, a warning message is printed directly to
``stderr`` even for successful implicit locale coercion. This gives
redistributors and system integrators the opportunity to determine if they
should be making an environmental change to avoid the need for implicit
coercion at the Python interpreter level.
However, it's not clear that this is going to be the best approach for
the final 3.7.0 release, and we may end up deciding to disable the warning
by default and provide some way of opting into it at runtime or build time.
Concrete examples of use cases where it would be preferrable to disable the
warning by default can be noted on :issue:`30565`.
Locale coercion is silent by default, but to assist in debugging potentially
locale related integration problems, explicit warnings (emitted directly on
``stderr`` can be requested by setting ``PYTHONCOERCECLOCALE=warn``. This
setting will also cause the Python runtime to emit a warning if the legacy C
locale remains active when the core interpreter is initialized.
.. seealso::
......
This diff is collapsed.
......@@ -105,10 +105,10 @@ static const char usage_6[] =
" predictable seed.\n"
"PYTHONMALLOC: set the Python memory allocators and/or install debug hooks\n"
" on Python memory allocators. Use PYTHONMALLOC=debug to install debug\n"
" hooks.\n";
static const char usage_7[] =
" hooks.\n"
"PYTHONCOERCECLOCALE: if this variable is set to 0, it disables the locale\n"
" coercion behavior\n";
" coercion behavior. Use PYTHONCOERCECLOCALE=warn to request display of\n"
" locale coercion and locale compatibility warnings on stderr.\n";
static int
usage(int exitcode, const wchar_t* program)
......@@ -125,7 +125,6 @@ usage(int exitcode, const wchar_t* program)
fprintf(f, usage_4, (wint_t)DELIM);
fprintf(f, usage_5, (wint_t)DELIM, PYTHONHOMEHELP);
fputs(usage_6, f);
fputs(usage_7, f);
}
return exitcode;
}
......
......@@ -356,6 +356,10 @@ _Py_LegacyLocaleDetected(void)
{
#ifndef MS_WINDOWS
/* On non-Windows systems, the C locale is considered a legacy locale */
/* XXX (ncoghlan): some platforms (notably Mac OS X) don't appear to treat
* the POSIX locale as a simple alias for the C locale, so
* we may also want to check for that explicitly.
*/
const char *ctype_loc = setlocale(LC_CTYPE, NULL);
return ctype_loc != NULL && strcmp(ctype_loc, "C") == 0;
#else
......@@ -364,6 +368,30 @@ _Py_LegacyLocaleDetected(void)
#endif
}
static const char *_C_LOCALE_WARNING =
"Python runtime initialized with LC_CTYPE=C (a locale with default ASCII "
"encoding), which may cause Unicode compatibility problems. Using C.UTF-8, "
"C.utf8, or UTF-8 (if available) as alternative Unicode-compatible "
"locales is recommended.\n";
static int
_legacy_locale_warnings_enabled(void)
{
const char *coerce_c_locale = getenv("PYTHONCOERCECLOCALE");
return (coerce_c_locale != NULL &&
strncmp(coerce_c_locale, "warn", 5) == 0);
}
static void
_emit_stderr_warning_for_legacy_locale(void)
{
if (_legacy_locale_warnings_enabled()) {
if (_Py_LegacyLocaleDetected()) {
fprintf(stderr, "%s", _C_LOCALE_WARNING);
}
}
}
typedef struct _CandidateLocale {
const char *locale_name; /* The locale to try as a coercion target */
} _LocaleCoercionTarget;
......@@ -371,10 +399,17 @@ typedef struct _CandidateLocale {
static _LocaleCoercionTarget _TARGET_LOCALES[] = {
{"C.UTF-8"},
{"C.utf8"},
{"UTF-8"},
/* {"UTF-8"}, */
{NULL}
};
/* XXX (ncoghlan): Using UTF-8 as a target locale is currently disabled due to
* problems encountered on *BSD systems with those test cases
* For additional details see:
* nl_langinfo CODESET error: https://bugs.python.org/issue30647
* locale handling differences: https://bugs.python.org/issue30672
*/
static char *
get_default_standard_stream_error_handler(void)
{
......@@ -419,7 +454,9 @@ _coerce_default_locale_settings(const _LocaleCoercionTarget *target)
"Error setting LC_CTYPE, skipping C locale coercion\n");
return;
}
fprintf(stderr, _C_LOCALE_COERCION_WARNING, newloc);
if (_legacy_locale_warnings_enabled()) {
fprintf(stderr, _C_LOCALE_COERCION_WARNING, newloc);
}
/* Reconfigure with the overridden environment variables */
setlocale(LC_ALL, "");
......@@ -465,26 +502,6 @@ _Py_CoerceLegacyLocale(void)
}
#ifdef PY_WARN_ON_C_LOCALE
static const char *_C_LOCALE_WARNING =
"Python runtime initialized with LC_CTYPE=C (a locale with default ASCII "
"encoding), which may cause Unicode compatibility problems. Using C.UTF-8, "
"C.utf8, or UTF-8 (if available) as alternative Unicode-compatible "
"locales is recommended.\n";
static void
_emit_stderr_warning_for_c_locale(void)
{
const char *coerce_c_locale = getenv("PYTHONCOERCECLOCALE");
if (coerce_c_locale == NULL || strncmp(coerce_c_locale, "0", 2) != 0) {
if (_Py_LegacyLocaleDetected()) {
fprintf(stderr, "%s", _C_LOCALE_WARNING);
}
}
}
#endif
/* Global initializations. Can be undone by Py_Finalize(). Don't
call this twice without an intervening Py_Finalize() call.
......@@ -561,9 +578,7 @@ void _Py_InitializeCore(const _PyCoreConfig *config)
the locale's charset without having to switch
locales. */
setlocale(LC_CTYPE, "");
#ifdef PY_WARN_ON_C_LOCALE
_emit_stderr_warning_for_c_locale();
#endif
_emit_stderr_warning_for_legacy_locale();
#endif
#endif
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment