Commit 42b1ba31 authored by Raymond Hettinger's avatar Raymond Hettinger

* list.sort() now supports three keyword arguments: cmp, key, and reverse.

  key provides C support for the decorate-sort-undecorate pattern.
  reverse provide a stable sort of the list with the comparisions reversed.

* Amended the docs to guarantee sort stability.
parent 90f7d254
......@@ -970,7 +970,8 @@ The following operations are defined on mutable sequence types (where
{same as \code{del \var{s}[\var{s}.index(\var{x})]}}{(4)}
\lineiii{\var{s}.reverse()}
{reverses the items of \var{s} in place}{(7)}
\lineiii{\var{s}.sort(\optional{\var{cmpfunc=None}})}
\lineiii{\var{s}.sort(\optional{\var{cmp}=None\optional{, \var{key}=None
\optional{, \var{reverse}=False}}})}
{sort the items of \var{s} in place}{(7), (8), (9), (10)}
\end{tableiii}
\indexiv{operations on}{mutable}{sequence}{types}
......@@ -1021,47 +1022,38 @@ Notes:
list. To remind you that they operate by side effect, they don't return
the sorted or reversed list.
\item[(8)] The \method{sort()} method takes an optional argument
specifying a comparison function of two arguments (list items) which
should return a negative, zero or positive number depending on whether
the first argument is considered smaller than, equal to, or larger
than the second argument. Note that this slows the sorting process
down considerably; for example to sort a list in reverse order it is much
faster to call \method{sort()} followed by \method{reverse()}
than to use \method{sort()} with a comparison function that
reverses the ordering of the elements. Passing \constant{None} as the
comparison function is semantically equivalent to calling
\method{sort()} with no comparison function.
\versionchanged[Support for \code{None} as an equivalent to omitting
\var{cmpfunc} was added]{2.3}
\item[(8)] The \method{sort()} method takes optional arguments for
controlling the comparisions.
As an example of using the \var{cmpfunc} argument to the
\method{sort()} method, consider sorting a list of sequences by the
second element of that list:
\var{cmp} specifies a custom comparison function of two arguments
(list items) which should return a negative, zero or positive number
depending on whether the first argument is considered smaller than,
equal to, or larger than the second argument:
\samp{\var{cmp}=\keyword{lambda} \var{x},\var{y}:
\function{cmp}(x.lower(), y.lower())}
\var{key} specifies a function of one argument that is used to
extract a comparison key from each list element:
\samp{\var{cmp}=\function{str.lower}}
\begin{verbatim}
def mycmp(a, b):
return cmp(a[1], b[1])
\var{reverse} is a boolean value. If set to \code{True}, then the
list elements are sorted as if each comparison were reversed.
mylist.sort(mycmp)
\end{verbatim}
In general, the \var{key} and \var{reverse} conversion processes are
much faster than specifying an equivalent \var{cmp} function. This is
because \var{cmp} is called multiple times for each list element while
\var{key} and \{reverse} touch each element only once.
A more time-efficient approach for reasonably-sized data structures can
often be used:
\versionchanged[Support for \code{None} as an equivalent to omitting
\var{cmpfunc} was added]{2.3}
\begin{verbatim}
tmplist = [(x[1], x) for x in mylist]
tmplist.sort()
mylist = [x for (key, x) in tmplist]
\end{verbatim}
\versionadded[Support for \var{key} and \var{reverse} was added]{2.4}
\item[(9)] Whether the \method{sort()} method is stable is not defined by
the language (a sort is stable if it guarantees not to change the
relative order of elements that compare equal). In the C
implementation of Python, sorts were stable only by accident through
Python 2.2. The C implementation of Python 2.3 introduced a stable
\method{sort()} method, but code that intends to be portable across
implementations and versions must not rely on stability.
\item[(9)] Starting with Python 2.3, the \method{sort()} method is
guaranteed to be stable. A sort is stable if it guarantees not to
change the relative order of elements that compare equal --- this is
helpful for sorting in multiple passes (for example, sort by
department, then by salary grade).
\item[(10)] While a list is being sorted, the effect of attempting to
mutate, or even inspect, the list is undefined. The C implementation
......
......@@ -116,56 +116,112 @@ for n in sizes:
x = [e for e, i in augmented] # a stable sort of s
check("stability", x, s)
def bug453523():
global nerrors
from random import random
# If this fails, the most likely outcome is a core dump.
if verbose:
print "Testing bug 453523 -- list.sort() crasher."
class C:
def __lt__(self, other):
if L and random() < 0.75:
pop()
else:
push(3)
return random() < 0.5
L = [C() for i in range(50)]
pop = L.pop
push = L.append
try:
L.sort()
except ValueError:
pass
else:
print " Mutation during list.sort() wasn't caught."
nerrors += 1
import unittest
from test import test_support
import sys
bug453523()
#==============================================================================
def cmpNone():
global nerrors
class TestBugs(unittest.TestCase):
if verbose:
print "Testing None as a comparison function."
def test_bug453523(self):
# bug 453523 -- list.sort() crasher.
# If this fails, the most likely outcome is a core dump.
# Mutations during a list sort should raise a ValueError.
L = range(50)
random.shuffle(L)
try:
class C:
def __lt__(self, other):
if L and random.random() < 0.75:
L.pop()
else:
L.append(3)
return random.random() < 0.5
L = [C() for i in range(50)]
self.assertRaises(ValueError, L.sort)
def test_cmpNone(self):
# Testing None as a comparison function.
L = range(50)
random.shuffle(L)
L.sort(None)
except TypeError:
print " Passing None as cmpfunc failed."
nerrors += 1
else:
if L != range(50):
print " Passing None as cmpfunc failed."
nerrors += 1
self.assertEqual(L, range(50))
#==============================================================================
class TestDecorateSortUndecorate(unittest.TestCase):
def test_decorated(self):
data = 'The quick Brown fox Jumped over The lazy Dog'.split()
copy = data[:]
random.shuffle(data)
data.sort(key=str.lower)
copy.sort(cmp=lambda x,y: cmp(x.lower(), y.lower()))
def test_baddecorator(self):
data = 'The quick Brown fox Jumped over The lazy Dog'.split()
self.assertRaises(TypeError, data.sort, None, lambda x,y: 0)
def test_stability(self):
data = [(random.randrange(100), i) for i in xrange(200)]
copy = data[:]
data.sort(key=lambda (x,y): x) # sort on the random first field
copy.sort() # sort using both fields
self.assertEqual(data, copy) # should get the same result
def test_cmp_and_key_combination(self):
# Verify that the wrapper has been removed
def compare(x, y):
self.assertEqual(type(x), str)
self.assertEqual(type(x), str)
return cmp(x, y)
data = 'The quick Brown fox Jumped over The lazy Dog'.split()
data.sort(cmp=compare, key=str.lower)
def test_badcmp_with_key(self):
# Verify that the wrapper has been removed
data = 'The quick Brown fox Jumped over The lazy Dog'.split()
self.assertRaises(TypeError, data.sort, "bad", str.lower)
def test_reverse(self):
data = range(100)
random.shuffle(data)
data.sort(reverse=True)
self.assertEqual(data, range(99,-1,-1))
def test_reverse_stability(self):
data = [(random.randrange(100), i) for i in xrange(200)]
copy1 = data[:]
copy2 = data[:]
data.sort(cmp=lambda x,y: cmp(x[0],y[0]), reverse=True)
copy1.sort(cmp=lambda x,y: cmp(y[0],x[0]))
self.assertEqual(data, copy1)
copy2.sort(key=lambda x: x[0], reverse=True)
self.assertEqual(data, copy2)
#==============================================================================
def test_main(verbose=None):
test_classes = (
TestDecorateSortUndecorate,
TestBugs,
)
test_support.run_unittest(*test_classes)
# verify reference counting
if verbose and hasattr(sys, "gettotalrefcount"):
import gc
counts = [None] * 5
for i in xrange(len(counts)):
test_support.run_unittest(*test_classes)
gc.collect()
counts[i] = sys.gettotalrefcount()
print counts
if __name__ == "__main__":
test_main(verbose=True)
cmpNone()
if nerrors:
print "Test failed", nerrors
elif verbose:
print "Test passed -- no errors."
......@@ -12,6 +12,15 @@ What's New in Python 2.4 alpha 1?
Core and builtins
-----------------
- list.sort() now supports three keyword arguments: cmp, key, and reverse.
The key argument can be a function of one argument that extracts a
comparison key from the original record: mylist.sort(key=str.lower).
The reverse argument is a boolean value and if True will change the
sort order as if the comparison arguments were reversed. In addition,
the documentation has been amended to provide a guarantee that all sorts
starting with Py2.3 are guaranteed to be stable (the relative order of
records with equal keys is unchanged).
- Added test whether wchar_t is signed or not. A signed wchar_t is not
usable as internal unicode type base for Py_UNICODE since the
unicode implementation assumes an unsigned type.
......
......@@ -1656,13 +1656,186 @@ merge_compute_minrun(int n)
return n + r;
}
/* Special wrapper to support stable sorting using the decorate-sort-undecorate
pattern. Holds a key which is used for comparisions and the original record
which is returned during the undecorate phase. By exposing only the key
during comparisons, the underlying sort stability characteristics are left
unchanged. Also, if a custom comparison function is used, it will only see
the key instead of a full record. */
typedef struct {
PyObject_HEAD
PyObject *key;
PyObject *value;
} sortwrapperobject;
static PyTypeObject sortwrapper_type;
static PyObject *
sortwrapper_richcompare(sortwrapperobject *a, sortwrapperobject *b, int op)
{
if (!PyObject_TypeCheck(b, &sortwrapper_type)) {
PyErr_SetString(PyExc_TypeError,
"expected a sortwrapperobject");
return NULL;
}
return PyObject_RichCompare(a->key, b->key, op);
}
static void
sortwrapper_dealloc(sortwrapperobject *so)
{
Py_XDECREF(so->key);
Py_XDECREF(so->value);
PyObject_Del(so);
}
PyDoc_STRVAR(sortwrapper_doc, "Object wrapper with a custom sort key.");
static PyTypeObject sortwrapper_type = {
PyObject_HEAD_INIT(&PyType_Type)
0, /* ob_size */
"sortwrapper", /* tp_name */
sizeof(sortwrapperobject), /* tp_basicsize */
0, /* tp_itemsize */
/* methods */
(destructor)sortwrapper_dealloc, /* tp_dealloc */
0, /* tp_print */
0, /* tp_getattr */
0, /* tp_setattr */
0, /* tp_compare */
0, /* tp_repr */
0, /* tp_as_number */
0, /* tp_as_sequence */
0, /* tp_as_mapping */
0, /* tp_hash */
0, /* tp_call */
0, /* tp_str */
PyObject_GenericGetAttr, /* tp_getattro */
0, /* tp_setattro */
0, /* tp_as_buffer */
Py_TPFLAGS_DEFAULT |
Py_TPFLAGS_HAVE_RICHCOMPARE, /* tp_flags */
sortwrapper_doc, /* tp_doc */
0, /* tp_traverse */
0, /* tp_clear */
(richcmpfunc)sortwrapper_richcompare, /* tp_richcompare */
};
/* Returns a new reference to a sortwrapper.
Consumes the references to the two underlying objects. */
static PyObject *
build_sortwrapper(PyObject *key, PyObject *value)
{
sortwrapperobject *so;
so = PyObject_New(sortwrapperobject, &sortwrapper_type);
if (so == NULL)
return NULL;
so->key = key;
so->value = value;
return (PyObject *)so;
}
/* Returns a new reference to the value underlying the wrapper. */
static PyObject *
sortwrapper_getvalue(PyObject *so)
{
PyObject *value;
if (!PyObject_TypeCheck(so, &sortwrapper_type)) {
PyErr_SetString(PyExc_TypeError,
"expected a sortwrapperobject");
return NULL;
}
value = ((sortwrapperobject *)so)->value;
Py_INCREF(value);
return value;
}
/* Wrapper for user specified cmp functions in combination with a
specified key function. Makes sure the cmp function is presented
with the actual key instead of the sortwrapper */
typedef struct {
PyObject_HEAD
PyObject *func;
} cmpwrapperobject;
static void
cmpwrapper_dealloc(cmpwrapperobject *co)
{
Py_XDECREF(co->func);
PyObject_Del(co);
}
static PyObject *
cmpwrapper_call(cmpwrapperobject *co, PyObject *args, PyObject *kwds)
{
PyObject *x, *y, *xx, *yy;
if (!PyArg_UnpackTuple(args, "", 2, 2, &x, &y))
return NULL;
if (!PyObject_TypeCheck(x, &sortwrapper_type) ||
!PyObject_TypeCheck(x, &sortwrapper_type)) {
PyErr_SetString(PyExc_TypeError,
"expected a sortwrapperobject");
return NULL;
}
xx = ((sortwrapperobject *)x)->key;
yy = ((sortwrapperobject *)y)->key;
return PyObject_CallFunctionObjArgs(co->func, xx, yy, NULL);
}
PyDoc_STRVAR(cmpwrapper_doc, "cmp() wrapper for sort with custom keys.");
static PyTypeObject cmpwrapper_type = {
PyObject_HEAD_INIT(&PyType_Type)
0, /* ob_size */
"cmpwrapper", /* tp_name */
sizeof(cmpwrapperobject), /* tp_basicsize */
0, /* tp_itemsize */
/* methods */
(destructor)cmpwrapper_dealloc, /* tp_dealloc */
0, /* tp_print */
0, /* tp_getattr */
0, /* tp_setattr */
0, /* tp_compare */
0, /* tp_repr */
0, /* tp_as_number */
0, /* tp_as_sequence */
0, /* tp_as_mapping */
0, /* tp_hash */
(ternaryfunc)cmpwrapper_call, /* tp_call */
0, /* tp_str */
PyObject_GenericGetAttr, /* tp_getattro */
0, /* tp_setattro */
0, /* tp_as_buffer */
Py_TPFLAGS_DEFAULT, /* tp_flags */
cmpwrapper_doc, /* tp_doc */
};
static PyObject *
build_cmpwrapper(PyObject *cmpfunc)
{
cmpwrapperobject *co;
co = PyObject_New(cmpwrapperobject, &cmpwrapper_type);
if (co == NULL)
return NULL;
Py_INCREF(cmpfunc);
co->func = cmpfunc;
return (PyObject *)co;
}
/* An adaptive, stable, natural mergesort. See listsort.txt.
* Returns Py_None on success, NULL on error. Even in case of error, the
* list will be some permutation of its input state (nothing is lost or
* duplicated).
*/
static PyObject *
listsort(PyListObject *self, PyObject *args)
listsort(PyListObject *self, PyObject *args, PyObject *kwds)
{
MergeState ms;
PyObject **lo, **hi;
......@@ -1673,14 +1846,48 @@ listsort(PyListObject *self, PyObject *args)
PyObject **empty_ob_item;
PyObject *compare = NULL;
PyObject *result = NULL; /* guilty until proved innocent */
int reverse = 0;
PyObject *keyfunc = NULL;
int i, n;
PyObject *key, *value, *kvpair;
static char *kwlist[] = {"cmp", "key", "reverse", 0};
assert(self != NULL);
assert (PyList_Check(self));
if (args != NULL) {
if (!PyArg_UnpackTuple(args, "sort", 0, 1, &compare))
if (!PyArg_ParseTupleAndKeywords(args, kwds, "|OOi:sort",
kwlist, &compare, &keyfunc, &reverse))
return NULL;
}
if (compare == Py_None)
compare = NULL;
if (keyfunc == Py_None)
keyfunc = NULL;
if (compare != NULL && keyfunc != NULL) {
compare = build_cmpwrapper(compare);
if (compare == NULL)
goto dsu_fail;
} else
Py_XINCREF(compare);
if (keyfunc != NULL) {
n = PyList_GET_SIZE(self);
for (i=0 ; i<n ; i++) {
value = PyList_GET_ITEM(self, i);
key = PyObject_CallFunctionObjArgs(keyfunc, value, NULL);
if (key == NULL)
goto dsu_fail;
kvpair = build_sortwrapper(key, value);
if (kvpair == NULL)
goto dsu_fail;
PyList_SET_ITEM(self, i, kvpair);
}
}
/* Reverse sort stability achieved by initialially reversing the list,
applying a stable forward sort, then reversing the final result. */
if (reverse && self->ob_size > 1)
reverse_slice(self->ob_item, self->ob_item + self->ob_size);
merge_init(&ms, compare);
......@@ -1758,6 +1965,21 @@ fail:
self->ob_size = saved_ob_size;
self->ob_item = saved_ob_item;
merge_freemem(&ms);
if (keyfunc != NULL) {
for (i=0 ; i<n ; i++) {
kvpair = PyList_GET_ITEM(self, i);
value = sortwrapper_getvalue(kvpair);
PyList_SET_ITEM(self, i, value);
Py_DECREF(kvpair);
}
}
if (reverse && self->ob_size > 1)
reverse_slice(self->ob_item, self->ob_item + self->ob_size);
dsu_fail:
Py_XDECREF(compare);
Py_XINCREF(result);
return result;
}
......@@ -1771,7 +1993,7 @@ PyList_Sort(PyObject *v)
PyErr_BadInternalCall();
return -1;
}
v = listsort((PyListObject *)v, (PyObject *)NULL);
v = listsort((PyListObject *)v, (PyObject *)NULL, (PyObject *)NULL);
if (v == NULL)
return -1;
Py_DECREF(v);
......@@ -2111,7 +2333,8 @@ PyDoc_STRVAR(count_doc,
PyDoc_STRVAR(reverse_doc,
"L.reverse() -- reverse *IN PLACE*");
PyDoc_STRVAR(sort_doc,
"L.sort(cmpfunc=None) -- stable sort *IN PLACE*; cmpfunc(x, y) -> -1, 0, 1");
"L.sort(cmp=None, key=None, reverse=False) -- stable sort *IN PLACE*;\n\
cmp(x, y) -> -1, 0, 1");
static PyMethodDef list_methods[] = {
{"append", (PyCFunction)listappend, METH_O, append_doc},
......@@ -2122,7 +2345,7 @@ static PyMethodDef list_methods[] = {
{"index", (PyCFunction)listindex, METH_VARARGS, index_doc},
{"count", (PyCFunction)listcount, METH_O, count_doc},
{"reverse", (PyCFunction)listreverse, METH_NOARGS, reverse_doc},
{"sort", (PyCFunction)listsort, METH_VARARGS, sort_doc},
{"sort", (PyCFunction)listsort, METH_VARARGS | METH_KEYWORDS, sort_doc},
{NULL, NULL} /* sentinel */
};
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment