Commit 6871f6ac authored by Marc-André Lemburg's avatar Marc-André Lemburg

Implement the changes proposed in patch #413333. unicode(obj) now

works just like str(obj) in that it tries __str__/tp_str on the object
in case it finds that the object is not a string or buffer.
parent c60e6f77
...@@ -429,6 +429,7 @@ verify(unicode('hello','utf-8') == u'hello') ...@@ -429,6 +429,7 @@ verify(unicode('hello','utf-8') == u'hello')
verify(unicode('hello','utf8') == u'hello') verify(unicode('hello','utf8') == u'hello')
verify(unicode('hello','latin-1') == u'hello') verify(unicode('hello','latin-1') == u'hello')
# Compatibility to str():
class String: class String:
x = '' x = ''
def __str__(self): def __str__(self):
...@@ -444,6 +445,10 @@ o.x = u'abc' ...@@ -444,6 +445,10 @@ o.x = u'abc'
verify(unicode(o) == u'abc') verify(unicode(o) == u'abc')
verify(str(o) == 'abc') verify(str(o) == 'abc')
for obj in (123, 123.45, 123L):
verify(unicode(obj) == unicode(str(obj)))
# Error handling
try: try:
u'Andr\202 x'.encode('ascii') u'Andr\202 x'.encode('ascii')
u'Andr\202 x'.encode('ascii','strict') u'Andr\202 x'.encode('ascii','strict')
......
...@@ -398,10 +398,11 @@ PyObject *PyUnicode_FromEncodedObject(register PyObject *obj, ...@@ -398,10 +398,11 @@ PyObject *PyUnicode_FromEncodedObject(register PyObject *obj,
const char *encoding, const char *encoding,
const char *errors) const char *errors)
{ {
const char *s; const char *s = NULL;
int len; int len;
int owned = 0; int owned = 0;
PyObject *v; PyObject *v;
int reclevel;
if (obj == NULL) { if (obj == NULL) {
PyErr_BadInternalCall(); PyErr_BadInternalCall();
...@@ -409,53 +410,65 @@ PyObject *PyUnicode_FromEncodedObject(register PyObject *obj, ...@@ -409,53 +410,65 @@ PyObject *PyUnicode_FromEncodedObject(register PyObject *obj,
} }
/* Coerce object */ /* Coerce object */
if (PyInstance_Check(obj)) { for (reclevel = 0; reclevel < 2; reclevel++) {
PyObject *func;
func = PyObject_GetAttrString(obj, "__str__"); if (PyUnicode_Check(obj)) {
if (func == NULL) { if (encoding) {
PyErr_SetString(PyExc_TypeError, PyErr_SetString(PyExc_TypeError,
"coercing to Unicode: instance doesn't define __str__"); "decoding Unicode is not supported");
return NULL; goto onError;
}
if (PyUnicode_CheckExact(obj)) {
Py_INCREF(obj);
v = obj;
}
else {
/* For a subclass of unicode, return a true unicode object
with the same string value. */
v = PyUnicode_FromUnicode(PyUnicode_AS_UNICODE(obj),
PyUnicode_GET_SIZE(obj));
}
goto done;
} }
obj = PyEval_CallObject(func, NULL); else if (PyString_Check(obj)) {
Py_DECREF(func); s = PyString_AS_STRING(obj);
if (obj == NULL) len = PyString_GET_SIZE(obj);
return NULL; break;
owned = 1;
}
if (PyUnicode_Check(obj)) {
if (encoding) {
PyErr_SetString(PyExc_TypeError,
"decoding Unicode is not supported");
return NULL;
} }
if (PyUnicode_CheckExact(obj)) { else {
Py_INCREF(obj); PyObject *w;
v = obj;
/* Try char buffer interface */
if (PyObject_AsCharBuffer(obj, &s, &len))
PyErr_Clear();
else
break;
/* Mimic the behaviour of str(object) if everything else
fails (see PyObject_Str()); this also covers instances
which implement __str__. */
if (obj->ob_type->tp_str == NULL)
w = PyObject_Repr(obj);
else
w = (*obj->ob_type->tp_str)(obj);
if (w == NULL)
goto onError;
if (owned) {
Py_DECREF(obj);
}
obj = w;
owned = 1;
} }
else {
/* For a subclass of unicode, return a true unicode object
with the same string value. */
v = PyUnicode_FromUnicode(PyUnicode_AS_UNICODE(obj),
PyUnicode_GET_SIZE(obj));
}
goto done;
} }
else if (PyString_Check(obj)) {
s = PyString_AS_STRING(obj); if (s == NULL) {
len = PyString_GET_SIZE(obj); PyErr_Format(PyExc_TypeError,
} "coercing to Unicode: __str__ recursion limit exceeded "
else if (PyObject_AsCharBuffer(obj, &s, &len)) { "(last type: %.80s)",
/* Overwrite the error message with something more useful in obj->ob_type->tp_name);
case of a TypeError. */
if (PyErr_ExceptionMatches(PyExc_TypeError))
PyErr_Format(PyExc_TypeError,
"coercing to Unicode: need string or buffer, "
"%.80s found",
obj->ob_type->tp_name);
goto onError; goto onError;
} }
/* Convert to Unicode */ /* Convert to Unicode */
if (len == 0) { if (len == 0) {
Py_INCREF(unicode_empty); Py_INCREF(unicode_empty);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment