Commit 9dd76201 authored by Victor Stinner's avatar Victor Stinner Committed by GitHub

bpo-32030: Add _Py_EncodeLocaleRaw() (#4961)

Replace Py_EncodeLocale() with _Py_EncodeLocaleRaw() in:

* _Py_wfopen()
* _Py_wreadlink()
* _Py_wrealpath()
* _Py_wstat()
* pymain_open_filename()

These functions are called early during Python intialization, only
the RAW memory allocator must be used.
parent 4a02543c
...@@ -13,10 +13,13 @@ PyAPI_FUNC(wchar_t *) Py_DecodeLocale( ...@@ -13,10 +13,13 @@ PyAPI_FUNC(wchar_t *) Py_DecodeLocale(
PyAPI_FUNC(char*) Py_EncodeLocale( PyAPI_FUNC(char*) Py_EncodeLocale(
const wchar_t *text, const wchar_t *text,
size_t *error_pos); size_t *error_pos);
PyAPI_FUNC(char*) _Py_EncodeLocaleRaw(
const wchar_t *text,
size_t *error_pos);
#endif #endif
#ifndef Py_LIMITED_API #ifndef Py_LIMITED_API
PyAPI_FUNC(PyObject *) _Py_device_encoding(int); PyAPI_FUNC(PyObject *) _Py_device_encoding(int);
#ifdef MS_WINDOWS #ifdef MS_WINDOWS
......
...@@ -140,13 +140,13 @@ _Py_wstat(const wchar_t* path, struct stat *buf) ...@@ -140,13 +140,13 @@ _Py_wstat(const wchar_t* path, struct stat *buf)
{ {
int err; int err;
char *fname; char *fname;
fname = Py_EncodeLocale(path, NULL); fname = _Py_EncodeLocaleRaw(path, NULL);
if (fname == NULL) { if (fname == NULL) {
errno = EINVAL; errno = EINVAL;
return -1; return -1;
} }
err = stat(fname, buf); err = stat(fname, buf);
PyMem_Free(fname); PyMem_RawFree(fname);
return err; return err;
} }
......
...@@ -1490,14 +1490,14 @@ pymain_open_filename(_PyMain *pymain) ...@@ -1490,14 +1490,14 @@ pymain_open_filename(_PyMain *pymain)
char *cfilename_buffer; char *cfilename_buffer;
const char *cfilename; const char *cfilename;
int err = errno; int err = errno;
cfilename_buffer = Py_EncodeLocale(pymain->filename, NULL); cfilename_buffer = _Py_EncodeLocaleRaw(pymain->filename, NULL);
if (cfilename_buffer != NULL) if (cfilename_buffer != NULL)
cfilename = cfilename_buffer; cfilename = cfilename_buffer;
else else
cfilename = "<unprintable file name>"; cfilename = "<unprintable file name>";
fprintf(stderr, "%ls: can't open file '%s': [Errno %d] %s\n", fprintf(stderr, "%ls: can't open file '%s': [Errno %d] %s\n",
pymain->config.program, cfilename, err, strerror(err)); pymain->config.program, cfilename, err, strerror(err));
PyMem_Free(cfilename_buffer); PyMem_RawFree(cfilename_buffer);
pymain->status = 2; pymain->status = 2;
return NULL; return NULL;
} }
......
...@@ -5158,7 +5158,8 @@ _Py_DecodeUTF8_surrogateescape(const char *s, Py_ssize_t size, size_t *p_wlen) ...@@ -5158,7 +5158,8 @@ _Py_DecodeUTF8_surrogateescape(const char *s, Py_ssize_t size, size_t *p_wlen)
On memory allocation failure, return NULL and write (size_t)-1 into On memory allocation failure, return NULL and write (size_t)-1 into
*error_pos (if error_pos is set). */ *error_pos (if error_pos is set). */
char* char*
_Py_EncodeUTF8_surrogateescape(const wchar_t *text, size_t *error_pos) _Py_EncodeUTF8_surrogateescape(const wchar_t *text, size_t *error_pos,
int raw_malloc)
{ {
const Py_ssize_t max_char_size = 4; const Py_ssize_t max_char_size = 4;
Py_ssize_t len = wcslen(text); Py_ssize_t len = wcslen(text);
...@@ -5167,7 +5168,12 @@ _Py_EncodeUTF8_surrogateescape(const wchar_t *text, size_t *error_pos) ...@@ -5167,7 +5168,12 @@ _Py_EncodeUTF8_surrogateescape(const wchar_t *text, size_t *error_pos)
char *bytes; char *bytes;
if (len <= PY_SSIZE_T_MAX / max_char_size - 1) { if (len <= PY_SSIZE_T_MAX / max_char_size - 1) {
bytes = PyMem_Malloc((len + 1) * max_char_size); if (raw_malloc) {
bytes = PyMem_RawMalloc((len + 1) * max_char_size);
}
else {
bytes = PyMem_Malloc((len + 1) * max_char_size);
}
} }
else { else {
bytes = NULL; bytes = NULL;
...@@ -5221,7 +5227,13 @@ _Py_EncodeUTF8_surrogateescape(const wchar_t *text, size_t *error_pos) ...@@ -5221,7 +5227,13 @@ _Py_EncodeUTF8_surrogateescape(const wchar_t *text, size_t *error_pos)
*p++ = '\0'; *p++ = '\0';
size_t final_size = (p - bytes); size_t final_size = (p - bytes);
char *bytes2 = PyMem_Realloc(bytes, final_size); char *bytes2;
if (raw_malloc) {
bytes2 = PyMem_RawRealloc(bytes, final_size);
}
else {
bytes2 = PyMem_Realloc(bytes, final_size);
}
if (bytes2 == NULL) { if (bytes2 == NULL) {
if (error_pos != NULL) { if (error_pos != NULL) {
*error_pos = (size_t)-1; *error_pos = (size_t)-1;
...@@ -5231,7 +5243,12 @@ _Py_EncodeUTF8_surrogateescape(const wchar_t *text, size_t *error_pos) ...@@ -5231,7 +5243,12 @@ _Py_EncodeUTF8_surrogateescape(const wchar_t *text, size_t *error_pos)
return bytes2; return bytes2;
error: error:
PyMem_Free(bytes); if (raw_malloc) {
PyMem_RawFree(bytes);
}
else {
PyMem_Free(bytes);
}
return NULL; return NULL;
} }
......
...@@ -23,7 +23,7 @@ extern int winerror_to_errno(int); ...@@ -23,7 +23,7 @@ extern int winerror_to_errno(int);
extern wchar_t* _Py_DecodeUTF8_surrogateescape(const char *s, Py_ssize_t size, extern wchar_t* _Py_DecodeUTF8_surrogateescape(const char *s, Py_ssize_t size,
size_t *p_wlen); size_t *p_wlen);
extern char* _Py_EncodeUTF8_surrogateescape(const wchar_t *text, extern char* _Py_EncodeUTF8_surrogateescape(const wchar_t *text,
size_t *error_pos); size_t *error_pos, int raw_malloc);
#ifdef O_CLOEXEC #ifdef O_CLOEXEC
/* Does open() support the O_CLOEXEC flag? Possible values: /* Does open() support the O_CLOEXEC flag? Possible values:
...@@ -183,7 +183,7 @@ error: ...@@ -183,7 +183,7 @@ error:
} }
static char* static char*
encode_ascii_surrogateescape(const wchar_t *text, size_t *error_pos) encode_ascii_surrogateescape(const wchar_t *text, size_t *error_pos, int raw_malloc)
{ {
char *result = NULL, *out; char *result = NULL, *out;
size_t len, i; size_t len, i;
...@@ -194,7 +194,13 @@ encode_ascii_surrogateescape(const wchar_t *text, size_t *error_pos) ...@@ -194,7 +194,13 @@ encode_ascii_surrogateescape(const wchar_t *text, size_t *error_pos)
len = wcslen(text); len = wcslen(text);
result = PyMem_Malloc(len + 1); /* +1 for NUL byte */ /* +1 for NUL byte */
if (raw_malloc) {
result = PyMem_RawMalloc(len + 1);
}
else {
result = PyMem_Malloc(len + 1);
}
if (result == NULL) if (result == NULL)
return NULL; return NULL;
...@@ -211,9 +217,15 @@ encode_ascii_surrogateescape(const wchar_t *text, size_t *error_pos) ...@@ -211,9 +217,15 @@ encode_ascii_surrogateescape(const wchar_t *text, size_t *error_pos)
*out++ = (char)(ch - 0xdc00); *out++ = (char)(ch - 0xdc00);
} }
else { else {
if (error_pos != NULL) if (error_pos != NULL) {
*error_pos = i; *error_pos = i;
PyMem_Free(result); }
if (raw_malloc) {
PyMem_RawFree(result);
}
else {
PyMem_Free(result);
}
return NULL; return NULL;
} }
} }
...@@ -423,7 +435,7 @@ Py_DecodeLocale(const char* arg, size_t *size) ...@@ -423,7 +435,7 @@ Py_DecodeLocale(const char* arg, size_t *size)
#if !defined(__APPLE__) && !defined(__ANDROID__) #if !defined(__APPLE__) && !defined(__ANDROID__)
static char* static char*
encode_locale(const wchar_t *text, size_t *error_pos) encode_current_locale(const wchar_t *text, size_t *error_pos, int raw_malloc)
{ {
const size_t len = wcslen(text); const size_t len = wcslen(text);
char *result = NULL, *bytes = NULL; char *result = NULL, *bytes = NULL;
...@@ -455,8 +467,14 @@ encode_locale(const wchar_t *text, size_t *error_pos) ...@@ -455,8 +467,14 @@ encode_locale(const wchar_t *text, size_t *error_pos)
else else
converted = wcstombs(NULL, buf, 0); converted = wcstombs(NULL, buf, 0);
if (converted == (size_t)-1) { if (converted == (size_t)-1) {
if (result != NULL) if (result != NULL) {
PyMem_Free(result); if (raw_malloc) {
PyMem_RawFree(result);
}
else {
PyMem_Free(result);
}
}
if (error_pos != NULL) if (error_pos != NULL)
*error_pos = i; *error_pos = i;
return NULL; return NULL;
...@@ -475,10 +493,16 @@ encode_locale(const wchar_t *text, size_t *error_pos) ...@@ -475,10 +493,16 @@ encode_locale(const wchar_t *text, size_t *error_pos)
} }
size += 1; /* nul byte at the end */ size += 1; /* nul byte at the end */
result = PyMem_Malloc(size); if (raw_malloc) {
result = PyMem_RawMalloc(size);
}
else {
result = PyMem_Malloc(size);
}
if (result == NULL) { if (result == NULL) {
if (error_pos != NULL) if (error_pos != NULL) {
*error_pos = (size_t)-1; *error_pos = (size_t)-1;
}
return NULL; return NULL;
} }
bytes = result; bytes = result;
...@@ -487,6 +511,28 @@ encode_locale(const wchar_t *text, size_t *error_pos) ...@@ -487,6 +511,28 @@ encode_locale(const wchar_t *text, size_t *error_pos)
} }
#endif #endif
static char*
encode_locale(const wchar_t *text, size_t *error_pos, int raw_malloc)
{
#if defined(__APPLE__) || defined(__ANDROID__)
return _Py_EncodeUTF8_surrogateescape(text, error_pos, raw_malloc);
#else /* __APPLE__ */
if (Py_UTF8Mode == 1) {
return _Py_EncodeUTF8_surrogateescape(text, error_pos, raw_malloc);
}
#ifndef MS_WINDOWS
if (force_ascii == -1)
force_ascii = check_force_ascii();
if (force_ascii)
return encode_ascii_surrogateescape(text, error_pos, raw_malloc);
#endif
return encode_current_locale(text, error_pos, raw_malloc);
#endif /* __APPLE__ or __ANDROID__ */
}
/* Encode a wide character string to the locale encoding with the /* Encode a wide character string to the locale encoding with the
surrogateescape error handler: surrogate characters in the range surrogateescape error handler: surrogate characters in the range
U+DC80..U+DCFF are converted to bytes 0x80..0xFF. U+DC80..U+DCFF are converted to bytes 0x80..0xFF.
...@@ -502,23 +548,16 @@ encode_locale(const wchar_t *text, size_t *error_pos) ...@@ -502,23 +548,16 @@ encode_locale(const wchar_t *text, size_t *error_pos)
char* char*
Py_EncodeLocale(const wchar_t *text, size_t *error_pos) Py_EncodeLocale(const wchar_t *text, size_t *error_pos)
{ {
#if defined(__APPLE__) || defined(__ANDROID__) return encode_locale(text, error_pos, 0);
return _Py_EncodeUTF8_surrogateescape(text, error_pos); }
#else /* __APPLE__ */
if (Py_UTF8Mode == 1) {
return _Py_EncodeUTF8_surrogateescape(text, error_pos);
}
#ifndef MS_WINDOWS
if (force_ascii == -1)
force_ascii = check_force_ascii();
if (force_ascii)
return encode_ascii_surrogateescape(text, error_pos);
#endif
return encode_locale(text, error_pos); /* Similar to Py_EncodeLocale(), but result must be freed by PyMem_RawFree()
#endif /* __APPLE__ or __ANDROID__ */ instead of PyMem_Free(). */
char*
_Py_EncodeLocaleRaw(const wchar_t *text, size_t *error_pos)
{
return encode_locale(text, error_pos, 1);
} }
...@@ -1029,11 +1068,12 @@ _Py_wfopen(const wchar_t *path, const wchar_t *mode) ...@@ -1029,11 +1068,12 @@ _Py_wfopen(const wchar_t *path, const wchar_t *mode)
errno = EINVAL; errno = EINVAL;
return NULL; return NULL;
} }
cpath = Py_EncodeLocale(path, NULL); cpath = _Py_EncodeLocaleRaw(path, NULL);
if (cpath == NULL) if (cpath == NULL) {
return NULL; return NULL;
}
f = fopen(cpath, cmode); f = fopen(cpath, cmode);
PyMem_Free(cpath); PyMem_RawFree(cpath);
#else #else
f = _wfopen(path, mode); f = _wfopen(path, mode);
#endif #endif
...@@ -1341,13 +1381,13 @@ _Py_wreadlink(const wchar_t *path, wchar_t *buf, size_t bufsiz) ...@@ -1341,13 +1381,13 @@ _Py_wreadlink(const wchar_t *path, wchar_t *buf, size_t bufsiz)
int res; int res;
size_t r1; size_t r1;
cpath = Py_EncodeLocale(path, NULL); cpath = _Py_EncodeLocaleRaw(path, NULL);
if (cpath == NULL) { if (cpath == NULL) {
errno = EINVAL; errno = EINVAL;
return -1; return -1;
} }
res = (int)readlink(cpath, cbuf, Py_ARRAY_LENGTH(cbuf)); res = (int)readlink(cpath, cbuf, Py_ARRAY_LENGTH(cbuf));
PyMem_Free(cpath); PyMem_RawFree(cpath);
if (res == -1) if (res == -1)
return -1; return -1;
if (res == Py_ARRAY_LENGTH(cbuf)) { if (res == Py_ARRAY_LENGTH(cbuf)) {
...@@ -1386,13 +1426,13 @@ _Py_wrealpath(const wchar_t *path, ...@@ -1386,13 +1426,13 @@ _Py_wrealpath(const wchar_t *path,
wchar_t *wresolved_path; wchar_t *wresolved_path;
char *res; char *res;
size_t r; size_t r;
cpath = Py_EncodeLocale(path, NULL); cpath = _Py_EncodeLocaleRaw(path, NULL);
if (cpath == NULL) { if (cpath == NULL) {
errno = EINVAL; errno = EINVAL;
return NULL; return NULL;
} }
res = realpath(cpath, cresolved_path); res = realpath(cpath, cresolved_path);
PyMem_Free(cpath); PyMem_RawFree(cpath);
if (res == NULL) if (res == NULL)
return NULL; return NULL;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment