Commit 1e36c61d authored by Victor Stinner's avatar Victor Stinner

Create _Py_wchar2char() function, reverse of _Py_char2wchar()

 * Use _Py_wchar2char() in _wstat() and _Py_wfopen()
 * Document _Py_char2wchar()
parent 52784151
...@@ -126,12 +126,15 @@ ...@@ -126,12 +126,15 @@
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
#endif #endif
/* _Py_Mangle is defined in compile.c */ /* _Py_Mangle is defined in compile.c */
PyAPI_FUNC(PyObject*) _Py_Mangle(PyObject *p, PyObject *name); PyAPI_FUNC(PyObject*) _Py_Mangle(PyObject *p, PyObject *name);
/* These functions live in main.c */ /* These functions live in main.c */
PyAPI_FUNC(wchar_t *) _Py_char2wchar(char *); PyAPI_FUNC(wchar_t *) _Py_char2wchar(char *);
PyAPI_FUNC(char*) _Py_wchar2char(const wchar_t *text);
PyAPI_FUNC(FILE *) _Py_wfopen(const wchar_t *path, const wchar_t *mode); PyAPI_FUNC(FILE *) _Py_wfopen(const wchar_t *path, const wchar_t *mode);
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif
......
...@@ -139,13 +139,16 @@ static wchar_t *lib_python = L"lib/python" VERSION; ...@@ -139,13 +139,16 @@ static wchar_t *lib_python = L"lib/python" VERSION;
static int static int
_wstat(const wchar_t* path, struct stat *buf) _wstat(const wchar_t* path, struct stat *buf)
{ {
char fname[PATH_MAX]; int err;
size_t res = wcstombs(fname, path, sizeof(fname)); char *fname;
if (res == (size_t)-1) { fname = _Py_wchar2char(path);
if (fname == NULL) {
errno = EINVAL; errno = EINVAL;
return -1; return -1;
} }
return stat(fname, buf); err = stat(fname, buf);
PyMem_Free(fname);
return err;
} }
#endif #endif
......
...@@ -105,20 +105,21 @@ FILE * ...@@ -105,20 +105,21 @@ FILE *
_Py_wfopen(const wchar_t *path, const wchar_t *mode) _Py_wfopen(const wchar_t *path, const wchar_t *mode)
{ {
#ifndef MS_WINDOWS #ifndef MS_WINDOWS
char cpath[PATH_MAX]; FILE *f;
char *cpath;
char cmode[10]; char cmode[10];
size_t r; size_t r;
r = wcstombs(cpath, path, PATH_MAX);
if (r == (size_t)-1 || r >= PATH_MAX) {
errno = EINVAL;
return NULL;
}
r = wcstombs(cmode, mode, 10); r = wcstombs(cmode, mode, 10);
if (r == (size_t)-1 || r >= 10) { if (r == (size_t)-1 || r >= 10) {
errno = EINVAL; errno = EINVAL;
return NULL; return NULL;
} }
return fopen(cpath, cmode); cpath = _Py_wchar2char(path);
if (cpath == NULL)
return NULL;
f = fopen(cpath, cmode);
PyMem_Free(cpath);
return f;
#else #else
return _wfopen(path, mode); return _wfopen(path, mode);
#endif #endif
...@@ -734,6 +735,85 @@ Py_GetArgcArgv(int *argc, wchar_t ***argv) ...@@ -734,6 +735,85 @@ Py_GetArgcArgv(int *argc, wchar_t ***argv)
} }
/* Encode a (wide) character string to the locale encoding with the
surrogateescape error handler (characters in range U+DC80..U+DCFF are
converted to bytes 0x80..0xFF).
This function is the reverse of _Py_char2wchar().
Return a pointer to a newly allocated byte string (use PyMem_Free() to free
the memory), or NULL on error (conversion error or memory error). */
char*
_Py_wchar2char(const wchar_t *text)
{
const size_t len = wcslen(text);
char *result = NULL, *bytes = NULL;
size_t i, size, converted;
wchar_t c, buf[2];
/* The function works in two steps:
1. compute the length of the output buffer in bytes (size)
2. outputs the bytes */
size = 0;
buf[1] = 0;
while (1) {
for (i=0; i < len; i++) {
c = text[i];
if (c >= 0xdc80 && c <= 0xdcff) {
/* UTF-8b surrogate */
if (bytes != NULL) {
*bytes++ = c - 0xdc00;
size--;
}
else
size++;
continue;
}
else {
buf[0] = c;
if (bytes != NULL)
converted = wcstombs(bytes, buf, size);
else
converted = wcstombs(NULL, buf, 0);
if (converted == (size_t)-1) {
if (result != NULL)
PyMem_Free(result);
return NULL;
}
if (bytes != NULL) {
bytes += converted;
size -= converted;
}
else
size += converted;
}
}
if (result != NULL) {
*bytes = 0;
break;
}
size += 1; /* nul byte at the end */
result = PyMem_Malloc(size);
if (result == NULL)
return NULL;
bytes = result;
}
return result;
}
/* Decode a byte string from the locale encoding with the
surrogateescape error handler (undecodable bytes are decoded as characters
in range U+DC80..U+DCFF). If a byte sequence can be decoded as a surrogate
character, escape the bytes using the surrogateescape error handler instead
of decoding them.
Use _Py_wchar2char() to encode the character string back to a byte string.
Return a pointer to a newly allocated (wide) character string (use
PyMem_Free() to free the memory), or NULL on error (conversion error or
memory error). */
wchar_t* wchar_t*
_Py_char2wchar(char* arg) _Py_char2wchar(char* arg)
{ {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment