Commit 09dc34fc authored by Guido van Rossum's avatar Guido van Rossum

Compare and hash unicode objects like their UTF-8 representations.

Accept Unicode characters < 256 for 'c' format.
parent f15a29f9
......@@ -5406,33 +5406,23 @@ unicode_compare(PyUnicodeObject *str1, PyUnicodeObject *str2)
int PyUnicode_Compare(PyObject *left,
PyObject *right)
{
PyUnicodeObject *u = NULL, *v = NULL;
int result;
/* Coerce the two arguments */
u = (PyUnicodeObject *)PyUnicode_FromObject(left);
if (u == NULL)
goto onError;
v = (PyUnicodeObject *)PyUnicode_FromObject(right);
if (v == NULL)
goto onError;
/* Shortcut for empty or interned objects */
if (v == u) {
Py_DECREF(u);
Py_DECREF(v);
return 0;
}
result = unicode_compare(u, v);
Py_DECREF(u);
Py_DECREF(v);
return result;
onError:
Py_XDECREF(u);
Py_XDECREF(v);
if (PyUnicode_Check(left) && PyUnicode_Check(right))
return unicode_compare((PyUnicodeObject *)left,
(PyUnicodeObject *)right);
if ((PyString_Check(left) && PyUnicode_Check(right)) ||
(PyUnicode_Check(left) && PyString_Check(right))) {
if (PyUnicode_Check(left))
left = _PyUnicode_AsDefaultEncodedString(left, NULL);
if (PyUnicode_Check(right))
right = _PyUnicode_AsDefaultEncodedString(right, NULL);
assert(PyString_Check(left));
assert(PyString_Check(right));
return PyObject_Compare(left, right);
}
PyErr_Format(PyExc_TypeError,
"Can't compare %.100s and %.100s",
left->ob_type->tp_name,
right->ob_type->tp_name);
return -1;
}
......@@ -5802,30 +5792,12 @@ unicode_getitem(PyUnicodeObject *self, Py_ssize_t index)
}
static long
unicode_hash(PyUnicodeObject *self)
{
/* Since Unicode objects compare equal to their ASCII string
counterparts, they should use the individual character values
as basis for their hash value. This is needed to assure that
strings and Unicode objects behave in the same way as
dictionary keys. */
register Py_ssize_t len;
register Py_UNICODE *p;
register long x;
if (self->hash != -1)
return self->hash;
len = PyUnicode_GET_SIZE(self);
p = PyUnicode_AS_UNICODE(self);
x = *p << 7;
while (--len >= 0)
x = (1000003*x) ^ *p++;
x ^= PyUnicode_GET_SIZE(self);
if (x == -1)
x = -2;
self->hash = x;
return x;
unicode_hash(PyObject *self)
{
/* Since Unicode objects compare equal to their UTF-8 string
counterparts, we hash the UTF-8 string. */
PyObject *v = _PyUnicode_AsDefaultEncodedString(self, NULL);
return PyObject_Hash(v);
}
PyDoc_STRVAR(index__doc__,
......
......@@ -764,8 +764,12 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags,
char *p = va_arg(*p_va, char *);
if (PyString_Check(arg) && PyString_Size(arg) == 1)
*p = PyString_AS_STRING(arg)[0];
else if (PyUnicode_Check(arg) &&
PyUnicode_GET_SIZE(arg) == 1 &&
PyUnicode_AS_UNICODE(arg)[0] < 256)
*p = PyUnicode_AS_UNICODE(arg)[0];
else
return converterr("char", arg, msgbuf, bufsize);
return converterr("char < 256", arg, msgbuf, bufsize);
break;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment