Commit 24beb313 authored by Martin v. Löwis's avatar Martin v. Löwis

Use wchar_t functions in _locale module.

parent 9e1a7a6a
...@@ -9,6 +9,10 @@ What's New in Python 3.0a4? ...@@ -9,6 +9,10 @@ What's New in Python 3.0a4?
*Release date: XX-XXX-2008* *Release date: XX-XXX-2008*
Extension Modules
-----------------
- Use wchar_t functions in _locale module.
What's New in Python 3.0a3? What's New in Python 3.0a3?
......
/*********************************************************** /***********************************************************
Copyright (C) 1997, 2002, 2003, 2007 Martin von Loewis Copyright (C) 1997, 2002, 2003, 2007, 2008 Martin von Loewis
Permission to use, copy, modify, and distribute this software and its Permission to use, copy, modify, and distribute this software and its
documentation for any purpose and without fee is hereby granted, documentation for any purpose and without fee is hereby granted,
...@@ -45,6 +45,35 @@ PyDoc_STRVAR(locale__doc__, "Support for POSIX locales."); ...@@ -45,6 +45,35 @@ PyDoc_STRVAR(locale__doc__, "Support for POSIX locales.");
static PyObject *Error; static PyObject *Error;
/* Convert a char* to a Unicode object according to the current locale */
static PyObject*
str2uni(const char* s)
{
size_t needed = mbstowcs(NULL, s, 0);
size_t res1;
wchar_t smallbuf[30];
wchar_t *dest;
PyObject *res2;
if (needed == (size_t)-1) {
PyErr_SetString(PyExc_ValueError, "Cannot convert byte to string");
return NULL;
}
if (needed < sizeof(smallbuf))
dest = smallbuf;
else {
dest = PyMem_Malloc(needed+1);
if (!dest)
return PyErr_NoMemory();
}
/* This shouldn't fail now */
res1 = mbstowcs(dest, s, needed+1);
assert(res == needed);
res2 = PyUnicode_FromWideChar(dest, res1);
if (dest != smallbuf)
PyMem_Free(dest);
return res2;
}
/* support functions for formatting floating point numbers */ /* support functions for formatting floating point numbers */
PyDoc_STRVAR(setlocale__doc__, PyDoc_STRVAR(setlocale__doc__,
...@@ -107,7 +136,7 @@ PyLocale_setlocale(PyObject* self, PyObject* args) ...@@ -107,7 +136,7 @@ PyLocale_setlocale(PyObject* self, PyObject* args)
PyErr_SetString(Error, "unsupported locale setting"); PyErr_SetString(Error, "unsupported locale setting");
return NULL; return NULL;
} }
result_object = PyUnicode_FromString(result); result_object = str2uni(result);
if (!result_object) if (!result_object)
return NULL; return NULL;
} else { } else {
...@@ -117,7 +146,7 @@ PyLocale_setlocale(PyObject* self, PyObject* args) ...@@ -117,7 +146,7 @@ PyLocale_setlocale(PyObject* self, PyObject* args)
PyErr_SetString(Error, "locale query failed"); PyErr_SetString(Error, "locale query failed");
return NULL; return NULL;
} }
result_object = PyUnicode_FromString(result); result_object = str2uni(result);
} }
return result_object; return result_object;
} }
...@@ -143,7 +172,7 @@ PyLocale_localeconv(PyObject* self) ...@@ -143,7 +172,7 @@ PyLocale_localeconv(PyObject* self)
involved herein */ involved herein */
#define RESULT_STRING(s)\ #define RESULT_STRING(s)\
x = PyUnicode_DecodeUnicodeEscape(l->s, strlen(l->s), "strict");\ x = str2uni(l->s); \
if (!x) goto failed;\ if (!x) goto failed;\
PyDict_SetItemString(result, #s, x);\ PyDict_SetItemString(result, #s, x);\
Py_XDECREF(x) Py_XDECREF(x)
...@@ -191,29 +220,19 @@ PyLocale_localeconv(PyObject* self) ...@@ -191,29 +220,19 @@ PyLocale_localeconv(PyObject* self)
return NULL; return NULL;
} }
#if defined(HAVE_WCSCOLL)
PyDoc_STRVAR(strcoll__doc__, PyDoc_STRVAR(strcoll__doc__,
"string,string -> int. Compares two strings according to the locale."); "string,string -> int. Compares two strings according to the locale.");
static PyObject* static PyObject*
PyLocale_strcoll(PyObject* self, PyObject* args) PyLocale_strcoll(PyObject* self, PyObject* args)
{ {
#if !defined(HAVE_WCSCOLL)
char *s1,*s2;
if (!PyArg_ParseTuple(args, "ss:strcoll", &s1, &s2))
return NULL;
return PyLong_FromLong(strcoll(s1, s2));
#else
PyObject *os1, *os2, *result = NULL; PyObject *os1, *os2, *result = NULL;
wchar_t *ws1 = NULL, *ws2 = NULL; wchar_t *ws1 = NULL, *ws2 = NULL;
int len1, len2; Py_ssize_t len1, len2;
if (!PyArg_UnpackTuple(args, "strcoll", 2, 2, &os1, &os2)) if (!PyArg_ParseTuple(args, "UU:strcoll", &os1, &os2))
return NULL; return NULL;
/* Both arguments must be unicode, or it's an error. */
if (!PyUnicode_Check(os1) || !PyUnicode_Check(os2)) {
PyErr_SetString(PyExc_ValueError, "strcoll arguments must be strings");
}
/* Convert the unicode strings to wchar[]. */ /* Convert the unicode strings to wchar[]. */
len1 = PyUnicode_GET_SIZE(os1) + 1; len1 = PyUnicode_GET_SIZE(os1) + 1;
ws1 = PyMem_MALLOC(len1 * sizeof(wchar_t)); ws1 = PyMem_MALLOC(len1 * sizeof(wchar_t));
...@@ -240,40 +259,62 @@ PyLocale_strcoll(PyObject* self, PyObject* args) ...@@ -240,40 +259,62 @@ PyLocale_strcoll(PyObject* self, PyObject* args)
if (ws1) PyMem_FREE(ws1); if (ws1) PyMem_FREE(ws1);
if (ws2) PyMem_FREE(ws2); if (ws2) PyMem_FREE(ws2);
return result; return result;
#endif
} }
#endif
#ifdef HAVE_WCSXFRM
PyDoc_STRVAR(strxfrm__doc__, PyDoc_STRVAR(strxfrm__doc__,
"string -> string. Returns a string that behaves for cmp locale-aware."); "string -> string. Returns a string that behaves for cmp locale-aware.");
static PyObject* static PyObject*
PyLocale_strxfrm(PyObject* self, PyObject* args) PyLocale_strxfrm(PyObject* self, PyObject* args)
{ {
char *s, *buf; Py_UNICODE *s0;
Py_ssize_t n0;
wchar_t *s, *buf = NULL;
size_t n1, n2; size_t n1, n2;
PyObject *result; PyObject *result = NULL;
Py_ssize_t i;
if (!PyArg_ParseTuple(args, "s:strxfrm", &s)) if (!PyArg_ParseTuple(args, "u#:strxfrm", &s0, &n0))
return NULL; return NULL;
#ifdef HAVE_USABLE_WCHAR_T
s = s0;
#else
s = PyMem_Malloc(n0+1);
if (!s)
return PyErr_NoMemory();
for (i=0; i<=n0; i++)
s[i] = s0[i];
#endif
/* assume no change in size, first */ /* assume no change in size, first */
n1 = strlen(s) + 1; n1 = wcslen(s) + 1;
buf = PyMem_Malloc(n1); buf = PyMem_Malloc(n1);
if (!buf) if (!buf) {
return PyErr_NoMemory(); PyErr_NoMemory();
n2 = strxfrm(buf, s, n1) + 1; goto exit;
if (n2 > n1) { }
n2 = wcsxfrm(buf, s, n1);
if (n2 >= n1) {
/* more space needed */ /* more space needed */
buf = PyMem_Realloc(buf, n2); buf = PyMem_Realloc(buf, n2+1);
if (!buf) if (!buf) {
return PyErr_NoMemory(); PyErr_NoMemory();
strxfrm(buf, s, n2); goto exit;
} }
result = PyUnicode_FromString(buf); n2 = wcsxfrm(buf, s, n2);
PyMem_Free(buf); }
result = PyUnicode_FromWideChar(buf, n2);
exit:
if (buf) PyMem_Free(buf);
#ifdef HAVE_USABLE_WCHAR_T
PyMem_Free(s);
#endif
return result; return result;
} }
#endif
#if defined(MS_WINDOWS) #if defined(MS_WINDOWS)
static PyObject* static PyObject*
...@@ -472,9 +513,7 @@ PyLocale_nl_langinfo(PyObject* self, PyObject* args) ...@@ -472,9 +513,7 @@ PyLocale_nl_langinfo(PyObject* self, PyObject* args)
instead of an empty string for nl_langinfo(ERA). */ instead of an empty string for nl_langinfo(ERA). */
const char *result = nl_langinfo(item); const char *result = nl_langinfo(item);
result = result != NULL ? result : ""; result = result != NULL ? result : "";
/* XXX may have to convert this to wcs first. */ return str2uni(result);
return PyUnicode_DecodeUnicodeEscape(result, strlen(result),
"strict");
} }
PyErr_SetString(PyExc_ValueError, "unsupported langinfo constant"); PyErr_SetString(PyExc_ValueError, "unsupported langinfo constant");
return NULL; return NULL;
...@@ -493,7 +532,7 @@ PyIntl_gettext(PyObject* self, PyObject *args) ...@@ -493,7 +532,7 @@ PyIntl_gettext(PyObject* self, PyObject *args)
char *in; char *in;
if (!PyArg_ParseTuple(args, "z", &in)) if (!PyArg_ParseTuple(args, "z", &in))
return 0; return 0;
return PyUnicode_FromString(gettext(in)); return str2uni(gettext(in));
} }
PyDoc_STRVAR(dgettext__doc__, PyDoc_STRVAR(dgettext__doc__,
...@@ -506,7 +545,7 @@ PyIntl_dgettext(PyObject* self, PyObject *args) ...@@ -506,7 +545,7 @@ PyIntl_dgettext(PyObject* self, PyObject *args)
char *domain, *in; char *domain, *in;
if (!PyArg_ParseTuple(args, "zz", &domain, &in)) if (!PyArg_ParseTuple(args, "zz", &domain, &in))
return 0; return 0;
return PyUnicode_FromString(dgettext(domain, in)); return str2uni(dgettext(domain, in));
} }
PyDoc_STRVAR(dcgettext__doc__, PyDoc_STRVAR(dcgettext__doc__,
...@@ -520,7 +559,7 @@ PyIntl_dcgettext(PyObject *self, PyObject *args) ...@@ -520,7 +559,7 @@ PyIntl_dcgettext(PyObject *self, PyObject *args)
int category; int category;
if (!PyArg_ParseTuple(args, "zzi", &domain, &msgid, &category)) if (!PyArg_ParseTuple(args, "zzi", &domain, &msgid, &category))
return 0; return 0;
return PyUnicode_FromString(dcgettext(domain,msgid,category)); return str2uni(dcgettext(domain,msgid,category));
} }
PyDoc_STRVAR(textdomain__doc__, PyDoc_STRVAR(textdomain__doc__,
...@@ -538,7 +577,7 @@ PyIntl_textdomain(PyObject* self, PyObject* args) ...@@ -538,7 +577,7 @@ PyIntl_textdomain(PyObject* self, PyObject* args)
PyErr_SetFromErrno(PyExc_OSError); PyErr_SetFromErrno(PyExc_OSError);
return NULL; return NULL;
} }
return PyUnicode_FromString(domain); return str2uni(domain);
} }
PyDoc_STRVAR(bindtextdomain__doc__, PyDoc_STRVAR(bindtextdomain__doc__,
...@@ -556,7 +595,7 @@ PyIntl_bindtextdomain(PyObject* self,PyObject*args) ...@@ -556,7 +595,7 @@ PyIntl_bindtextdomain(PyObject* self,PyObject*args)
PyErr_SetFromErrno(PyExc_OSError); PyErr_SetFromErrno(PyExc_OSError);
return NULL; return NULL;
} }
return PyUnicode_FromString(dirname); return str2uni(dirname);
} }
#ifdef HAVE_BIND_TEXTDOMAIN_CODESET #ifdef HAVE_BIND_TEXTDOMAIN_CODESET
...@@ -572,7 +611,7 @@ PyIntl_bind_textdomain_codeset(PyObject* self,PyObject*args) ...@@ -572,7 +611,7 @@ PyIntl_bind_textdomain_codeset(PyObject* self,PyObject*args)
return NULL; return NULL;
codeset = bind_textdomain_codeset(domain, codeset); codeset = bind_textdomain_codeset(domain, codeset);
if (codeset) if (codeset)
return PyUnicode_FromString(codeset); return str2uni(codeset);
Py_RETURN_NONE; Py_RETURN_NONE;
} }
#endif #endif
...@@ -584,10 +623,14 @@ static struct PyMethodDef PyLocale_Methods[] = { ...@@ -584,10 +623,14 @@ static struct PyMethodDef PyLocale_Methods[] = {
METH_VARARGS, setlocale__doc__}, METH_VARARGS, setlocale__doc__},
{"localeconv", (PyCFunction) PyLocale_localeconv, {"localeconv", (PyCFunction) PyLocale_localeconv,
METH_NOARGS, localeconv__doc__}, METH_NOARGS, localeconv__doc__},
#ifdef HAVE_WCSCOLL
{"strcoll", (PyCFunction) PyLocale_strcoll, {"strcoll", (PyCFunction) PyLocale_strcoll,
METH_VARARGS, strcoll__doc__}, METH_VARARGS, strcoll__doc__},
#endif
#ifdef HAVE_WCSXFRM
{"strxfrm", (PyCFunction) PyLocale_strxfrm, {"strxfrm", (PyCFunction) PyLocale_strxfrm,
METH_VARARGS, strxfrm__doc__}, METH_VARARGS, strxfrm__doc__},
#endif
#if defined(MS_WINDOWS) || defined(__APPLE__) #if defined(MS_WINDOWS) || defined(__APPLE__)
{"_getdefaultlocale", (PyCFunction) PyLocale_getdefaultlocale, METH_NOARGS}, {"_getdefaultlocale", (PyCFunction) PyLocale_getdefaultlocale, METH_NOARGS},
#endif #endif
......
#! /bin/sh #! /bin/sh
# From configure.in Revision: 60787 . # From configure.in Revision: 61238 .
# Guess values for system-dependent variables and create Makefiles. # Guess values for system-dependent variables and create Makefiles.
# Generated by GNU Autoconf 2.61 for python 3.0. # Generated by GNU Autoconf 2.61 for python 3.0.
# #
...@@ -15713,6 +15713,7 @@ echo "${ECHO_T}MACHDEP_OBJS" >&6; } ...@@ -15713,6 +15713,7 @@ echo "${ECHO_T}MACHDEP_OBJS" >&6; }
for ac_func in alarm bind_textdomain_codeset chown clock confstr \ for ac_func in alarm bind_textdomain_codeset chown clock confstr \
...@@ -15726,7 +15727,7 @@ for ac_func in alarm bind_textdomain_codeset chown clock confstr \ ...@@ -15726,7 +15727,7 @@ for ac_func in alarm bind_textdomain_codeset chown clock confstr \
setlocale setregid setreuid setsid setpgid setpgrp setuid setvbuf snprintf \ setlocale setregid setreuid setsid setpgid setpgrp setuid setvbuf snprintf \
sigaction siginterrupt sigrelse strftime strlcpy \ sigaction siginterrupt sigrelse strftime strlcpy \
sysconf tcgetpgrp tcsetpgrp tempnam timegm times tmpfile tmpnam tmpnam_r \ sysconf tcgetpgrp tcsetpgrp tempnam timegm times tmpfile tmpnam tmpnam_r \
truncate uname unsetenv utimes waitpid wait3 wait4 wcscoll _getpty truncate uname unsetenv utimes waitpid wait3 wait4 wcscoll wcsxfrm _getpty
do do
as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh` as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh`
{ echo "$as_me:$LINENO: checking for $ac_func" >&5 { echo "$as_me:$LINENO: checking for $ac_func" >&5
......
...@@ -2284,7 +2284,7 @@ AC_CHECK_FUNCS(alarm bind_textdomain_codeset chown clock confstr \ ...@@ -2284,7 +2284,7 @@ AC_CHECK_FUNCS(alarm bind_textdomain_codeset chown clock confstr \
setlocale setregid setreuid setsid setpgid setpgrp setuid setvbuf snprintf \ setlocale setregid setreuid setsid setpgid setpgrp setuid setvbuf snprintf \
sigaction siginterrupt sigrelse strftime strlcpy \ sigaction siginterrupt sigrelse strftime strlcpy \
sysconf tcgetpgrp tcsetpgrp tempnam timegm times tmpfile tmpnam tmpnam_r \ sysconf tcgetpgrp tcsetpgrp tempnam timegm times tmpfile tmpnam tmpnam_r \
truncate uname unsetenv utimes waitpid wait3 wait4 wcscoll _getpty) truncate uname unsetenv utimes waitpid wait3 wait4 wcscoll wcsxfrm _getpty)
# For some functions, having a definition is not sufficient, since # For some functions, having a definition is not sufficient, since
# we want to take their address. # we want to take their address.
......
...@@ -776,6 +776,9 @@ ...@@ -776,6 +776,9 @@
/* Define to 1 if you have the `wcscoll' function. */ /* Define to 1 if you have the `wcscoll' function. */
#undef HAVE_WCSCOLL #undef HAVE_WCSCOLL
/* Define to 1 if you have the `wcsxfrm' function. */
#undef HAVE_WCSXFRM
/* Define if tzset() actually switches the local timezone in a meaningful way. /* Define if tzset() actually switches the local timezone in a meaningful way.
*/ */
#undef HAVE_WORKING_TZSET #undef HAVE_WORKING_TZSET
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment