Commit 42b1ba31 authored by Raymond Hettinger's avatar Raymond Hettinger

* list.sort() now supports three keyword arguments: cmp, key, and reverse.

  key provides C support for the decorate-sort-undecorate pattern.
  reverse provide a stable sort of the list with the comparisions reversed.

* Amended the docs to guarantee sort stability.
parent 90f7d254
...@@ -970,7 +970,8 @@ The following operations are defined on mutable sequence types (where ...@@ -970,7 +970,8 @@ The following operations are defined on mutable sequence types (where
{same as \code{del \var{s}[\var{s}.index(\var{x})]}}{(4)} {same as \code{del \var{s}[\var{s}.index(\var{x})]}}{(4)}
\lineiii{\var{s}.reverse()} \lineiii{\var{s}.reverse()}
{reverses the items of \var{s} in place}{(7)} {reverses the items of \var{s} in place}{(7)}
\lineiii{\var{s}.sort(\optional{\var{cmpfunc=None}})} \lineiii{\var{s}.sort(\optional{\var{cmp}=None\optional{, \var{key}=None
\optional{, \var{reverse}=False}}})}
{sort the items of \var{s} in place}{(7), (8), (9), (10)} {sort the items of \var{s} in place}{(7), (8), (9), (10)}
\end{tableiii} \end{tableiii}
\indexiv{operations on}{mutable}{sequence}{types} \indexiv{operations on}{mutable}{sequence}{types}
...@@ -1021,47 +1022,38 @@ Notes: ...@@ -1021,47 +1022,38 @@ Notes:
list. To remind you that they operate by side effect, they don't return list. To remind you that they operate by side effect, they don't return
the sorted or reversed list. the sorted or reversed list.
\item[(8)] The \method{sort()} method takes an optional argument \item[(8)] The \method{sort()} method takes optional arguments for
specifying a comparison function of two arguments (list items) which controlling the comparisions.
should return a negative, zero or positive number depending on whether
the first argument is considered smaller than, equal to, or larger
than the second argument. Note that this slows the sorting process
down considerably; for example to sort a list in reverse order it is much
faster to call \method{sort()} followed by \method{reverse()}
than to use \method{sort()} with a comparison function that
reverses the ordering of the elements. Passing \constant{None} as the
comparison function is semantically equivalent to calling
\method{sort()} with no comparison function.
\versionchanged[Support for \code{None} as an equivalent to omitting
\var{cmpfunc} was added]{2.3}
As an example of using the \var{cmpfunc} argument to the \var{cmp} specifies a custom comparison function of two arguments
\method{sort()} method, consider sorting a list of sequences by the (list items) which should return a negative, zero or positive number
second element of that list: depending on whether the first argument is considered smaller than,
equal to, or larger than the second argument:
\samp{\var{cmp}=\keyword{lambda} \var{x},\var{y}:
\function{cmp}(x.lower(), y.lower())}
\var{key} specifies a function of one argument that is used to
extract a comparison key from each list element:
\samp{\var{cmp}=\function{str.lower}}
\begin{verbatim} \var{reverse} is a boolean value. If set to \code{True}, then the
def mycmp(a, b): list elements are sorted as if each comparison were reversed.
return cmp(a[1], b[1])
mylist.sort(mycmp) In general, the \var{key} and \var{reverse} conversion processes are
\end{verbatim} much faster than specifying an equivalent \var{cmp} function. This is
because \var{cmp} is called multiple times for each list element while
\var{key} and \{reverse} touch each element only once.
A more time-efficient approach for reasonably-sized data structures can \versionchanged[Support for \code{None} as an equivalent to omitting
often be used: \var{cmpfunc} was added]{2.3}
\begin{verbatim} \versionadded[Support for \var{key} and \var{reverse} was added]{2.4}
tmplist = [(x[1], x) for x in mylist]
tmplist.sort()
mylist = [x for (key, x) in tmplist]
\end{verbatim}
\item[(9)] Whether the \method{sort()} method is stable is not defined by \item[(9)] Starting with Python 2.3, the \method{sort()} method is
the language (a sort is stable if it guarantees not to change the guaranteed to be stable. A sort is stable if it guarantees not to
relative order of elements that compare equal). In the C change the relative order of elements that compare equal --- this is
implementation of Python, sorts were stable only by accident through helpful for sorting in multiple passes (for example, sort by
Python 2.2. The C implementation of Python 2.3 introduced a stable department, then by salary grade).
\method{sort()} method, but code that intends to be portable across
implementations and versions must not rely on stability.
\item[(10)] While a list is being sorted, the effect of attempting to \item[(10)] While a list is being sorted, the effect of attempting to
mutate, or even inspect, the list is undefined. The C implementation mutate, or even inspect, the list is undefined. The C implementation
......
...@@ -116,56 +116,112 @@ for n in sizes: ...@@ -116,56 +116,112 @@ for n in sizes:
x = [e for e, i in augmented] # a stable sort of s x = [e for e, i in augmented] # a stable sort of s
check("stability", x, s) check("stability", x, s)
def bug453523():
global nerrors
from random import random
# If this fails, the most likely outcome is a core dump. import unittest
if verbose: from test import test_support
print "Testing bug 453523 -- list.sort() crasher." import sys
class C:
def __lt__(self, other):
if L and random() < 0.75:
pop()
else:
push(3)
return random() < 0.5
L = [C() for i in range(50)]
pop = L.pop
push = L.append
try:
L.sort()
except ValueError:
pass
else:
print " Mutation during list.sort() wasn't caught."
nerrors += 1
bug453523() #==============================================================================
def cmpNone(): class TestBugs(unittest.TestCase):
global nerrors
if verbose: def test_bug453523(self):
print "Testing None as a comparison function." # bug 453523 -- list.sort() crasher.
# If this fails, the most likely outcome is a core dump.
# Mutations during a list sort should raise a ValueError.
L = range(50) class C:
random.shuffle(L) def __lt__(self, other):
try: if L and random.random() < 0.75:
L.pop()
else:
L.append(3)
return random.random() < 0.5
L = [C() for i in range(50)]
self.assertRaises(ValueError, L.sort)
def test_cmpNone(self):
# Testing None as a comparison function.
L = range(50)
random.shuffle(L)
L.sort(None) L.sort(None)
except TypeError: self.assertEqual(L, range(50))
print " Passing None as cmpfunc failed."
nerrors += 1 #==============================================================================
else:
if L != range(50): class TestDecorateSortUndecorate(unittest.TestCase):
print " Passing None as cmpfunc failed."
nerrors += 1 def test_decorated(self):
data = 'The quick Brown fox Jumped over The lazy Dog'.split()
copy = data[:]
random.shuffle(data)
data.sort(key=str.lower)
copy.sort(cmp=lambda x,y: cmp(x.lower(), y.lower()))
def test_baddecorator(self):
data = 'The quick Brown fox Jumped over The lazy Dog'.split()
self.assertRaises(TypeError, data.sort, None, lambda x,y: 0)
def test_stability(self):
data = [(random.randrange(100), i) for i in xrange(200)]
copy = data[:]
data.sort(key=lambda (x,y): x) # sort on the random first field
copy.sort() # sort using both fields
self.assertEqual(data, copy) # should get the same result
def test_cmp_and_key_combination(self):
# Verify that the wrapper has been removed
def compare(x, y):
self.assertEqual(type(x), str)
self.assertEqual(type(x), str)
return cmp(x, y)
data = 'The quick Brown fox Jumped over The lazy Dog'.split()
data.sort(cmp=compare, key=str.lower)
def test_badcmp_with_key(self):
# Verify that the wrapper has been removed
data = 'The quick Brown fox Jumped over The lazy Dog'.split()
self.assertRaises(TypeError, data.sort, "bad", str.lower)
def test_reverse(self):
data = range(100)
random.shuffle(data)
data.sort(reverse=True)
self.assertEqual(data, range(99,-1,-1))
def test_reverse_stability(self):
data = [(random.randrange(100), i) for i in xrange(200)]
copy1 = data[:]
copy2 = data[:]
data.sort(cmp=lambda x,y: cmp(x[0],y[0]), reverse=True)
copy1.sort(cmp=lambda x,y: cmp(y[0],x[0]))
self.assertEqual(data, copy1)
copy2.sort(key=lambda x: x[0], reverse=True)
self.assertEqual(data, copy2)
#==============================================================================
def test_main(verbose=None):
test_classes = (
TestDecorateSortUndecorate,
TestBugs,
)
test_support.run_unittest(*test_classes)
# verify reference counting
if verbose and hasattr(sys, "gettotalrefcount"):
import gc
counts = [None] * 5
for i in xrange(len(counts)):
test_support.run_unittest(*test_classes)
gc.collect()
counts[i] = sys.gettotalrefcount()
print counts
if __name__ == "__main__":
test_main(verbose=True)
cmpNone()
if nerrors:
print "Test failed", nerrors
elif verbose:
print "Test passed -- no errors."
...@@ -12,6 +12,15 @@ What's New in Python 2.4 alpha 1? ...@@ -12,6 +12,15 @@ What's New in Python 2.4 alpha 1?
Core and builtins Core and builtins
----------------- -----------------
- list.sort() now supports three keyword arguments: cmp, key, and reverse.
The key argument can be a function of one argument that extracts a
comparison key from the original record: mylist.sort(key=str.lower).
The reverse argument is a boolean value and if True will change the
sort order as if the comparison arguments were reversed. In addition,
the documentation has been amended to provide a guarantee that all sorts
starting with Py2.3 are guaranteed to be stable (the relative order of
records with equal keys is unchanged).
- Added test whether wchar_t is signed or not. A signed wchar_t is not - Added test whether wchar_t is signed or not. A signed wchar_t is not
usable as internal unicode type base for Py_UNICODE since the usable as internal unicode type base for Py_UNICODE since the
unicode implementation assumes an unsigned type. unicode implementation assumes an unsigned type.
......
...@@ -1656,13 +1656,186 @@ merge_compute_minrun(int n) ...@@ -1656,13 +1656,186 @@ merge_compute_minrun(int n)
return n + r; return n + r;
} }
/* Special wrapper to support stable sorting using the decorate-sort-undecorate
pattern. Holds a key which is used for comparisions and the original record
which is returned during the undecorate phase. By exposing only the key
during comparisons, the underlying sort stability characteristics are left
unchanged. Also, if a custom comparison function is used, it will only see
the key instead of a full record. */
typedef struct {
PyObject_HEAD
PyObject *key;
PyObject *value;
} sortwrapperobject;
static PyTypeObject sortwrapper_type;
static PyObject *
sortwrapper_richcompare(sortwrapperobject *a, sortwrapperobject *b, int op)
{
if (!PyObject_TypeCheck(b, &sortwrapper_type)) {
PyErr_SetString(PyExc_TypeError,
"expected a sortwrapperobject");
return NULL;
}
return PyObject_RichCompare(a->key, b->key, op);
}
static void
sortwrapper_dealloc(sortwrapperobject *so)
{
Py_XDECREF(so->key);
Py_XDECREF(so->value);
PyObject_Del(so);
}
PyDoc_STRVAR(sortwrapper_doc, "Object wrapper with a custom sort key.");
static PyTypeObject sortwrapper_type = {
PyObject_HEAD_INIT(&PyType_Type)
0, /* ob_size */
"sortwrapper", /* tp_name */
sizeof(sortwrapperobject), /* tp_basicsize */
0, /* tp_itemsize */
/* methods */
(destructor)sortwrapper_dealloc, /* tp_dealloc */
0, /* tp_print */
0, /* tp_getattr */
0, /* tp_setattr */
0, /* tp_compare */
0, /* tp_repr */
0, /* tp_as_number */
0, /* tp_as_sequence */
0, /* tp_as_mapping */
0, /* tp_hash */
0, /* tp_call */
0, /* tp_str */
PyObject_GenericGetAttr, /* tp_getattro */
0, /* tp_setattro */
0, /* tp_as_buffer */
Py_TPFLAGS_DEFAULT |
Py_TPFLAGS_HAVE_RICHCOMPARE, /* tp_flags */
sortwrapper_doc, /* tp_doc */
0, /* tp_traverse */
0, /* tp_clear */
(richcmpfunc)sortwrapper_richcompare, /* tp_richcompare */
};
/* Returns a new reference to a sortwrapper.
Consumes the references to the two underlying objects. */
static PyObject *
build_sortwrapper(PyObject *key, PyObject *value)
{
sortwrapperobject *so;
so = PyObject_New(sortwrapperobject, &sortwrapper_type);
if (so == NULL)
return NULL;
so->key = key;
so->value = value;
return (PyObject *)so;
}
/* Returns a new reference to the value underlying the wrapper. */
static PyObject *
sortwrapper_getvalue(PyObject *so)
{
PyObject *value;
if (!PyObject_TypeCheck(so, &sortwrapper_type)) {
PyErr_SetString(PyExc_TypeError,
"expected a sortwrapperobject");
return NULL;
}
value = ((sortwrapperobject *)so)->value;
Py_INCREF(value);
return value;
}
/* Wrapper for user specified cmp functions in combination with a
specified key function. Makes sure the cmp function is presented
with the actual key instead of the sortwrapper */
typedef struct {
PyObject_HEAD
PyObject *func;
} cmpwrapperobject;
static void
cmpwrapper_dealloc(cmpwrapperobject *co)
{
Py_XDECREF(co->func);
PyObject_Del(co);
}
static PyObject *
cmpwrapper_call(cmpwrapperobject *co, PyObject *args, PyObject *kwds)
{
PyObject *x, *y, *xx, *yy;
if (!PyArg_UnpackTuple(args, "", 2, 2, &x, &y))
return NULL;
if (!PyObject_TypeCheck(x, &sortwrapper_type) ||
!PyObject_TypeCheck(x, &sortwrapper_type)) {
PyErr_SetString(PyExc_TypeError,
"expected a sortwrapperobject");
return NULL;
}
xx = ((sortwrapperobject *)x)->key;
yy = ((sortwrapperobject *)y)->key;
return PyObject_CallFunctionObjArgs(co->func, xx, yy, NULL);
}
PyDoc_STRVAR(cmpwrapper_doc, "cmp() wrapper for sort with custom keys.");
static PyTypeObject cmpwrapper_type = {
PyObject_HEAD_INIT(&PyType_Type)
0, /* ob_size */
"cmpwrapper", /* tp_name */
sizeof(cmpwrapperobject), /* tp_basicsize */
0, /* tp_itemsize */
/* methods */
(destructor)cmpwrapper_dealloc, /* tp_dealloc */
0, /* tp_print */
0, /* tp_getattr */
0, /* tp_setattr */
0, /* tp_compare */
0, /* tp_repr */
0, /* tp_as_number */
0, /* tp_as_sequence */
0, /* tp_as_mapping */
0, /* tp_hash */
(ternaryfunc)cmpwrapper_call, /* tp_call */
0, /* tp_str */
PyObject_GenericGetAttr, /* tp_getattro */
0, /* tp_setattro */
0, /* tp_as_buffer */
Py_TPFLAGS_DEFAULT, /* tp_flags */
cmpwrapper_doc, /* tp_doc */
};
static PyObject *
build_cmpwrapper(PyObject *cmpfunc)
{
cmpwrapperobject *co;
co = PyObject_New(cmpwrapperobject, &cmpwrapper_type);
if (co == NULL)
return NULL;
Py_INCREF(cmpfunc);
co->func = cmpfunc;
return (PyObject *)co;
}
/* An adaptive, stable, natural mergesort. See listsort.txt. /* An adaptive, stable, natural mergesort. See listsort.txt.
* Returns Py_None on success, NULL on error. Even in case of error, the * Returns Py_None on success, NULL on error. Even in case of error, the
* list will be some permutation of its input state (nothing is lost or * list will be some permutation of its input state (nothing is lost or
* duplicated). * duplicated).
*/ */
static PyObject * static PyObject *
listsort(PyListObject *self, PyObject *args) listsort(PyListObject *self, PyObject *args, PyObject *kwds)
{ {
MergeState ms; MergeState ms;
PyObject **lo, **hi; PyObject **lo, **hi;
...@@ -1673,14 +1846,48 @@ listsort(PyListObject *self, PyObject *args) ...@@ -1673,14 +1846,48 @@ listsort(PyListObject *self, PyObject *args)
PyObject **empty_ob_item; PyObject **empty_ob_item;
PyObject *compare = NULL; PyObject *compare = NULL;
PyObject *result = NULL; /* guilty until proved innocent */ PyObject *result = NULL; /* guilty until proved innocent */
int reverse = 0;
PyObject *keyfunc = NULL;
int i, n;
PyObject *key, *value, *kvpair;
static char *kwlist[] = {"cmp", "key", "reverse", 0};
assert(self != NULL); assert(self != NULL);
assert (PyList_Check(self));
if (args != NULL) { if (args != NULL) {
if (!PyArg_UnpackTuple(args, "sort", 0, 1, &compare)) if (!PyArg_ParseTupleAndKeywords(args, kwds, "|OOi:sort",
kwlist, &compare, &keyfunc, &reverse))
return NULL; return NULL;
} }
if (compare == Py_None) if (compare == Py_None)
compare = NULL; compare = NULL;
if (keyfunc == Py_None)
keyfunc = NULL;
if (compare != NULL && keyfunc != NULL) {
compare = build_cmpwrapper(compare);
if (compare == NULL)
goto dsu_fail;
} else
Py_XINCREF(compare);
if (keyfunc != NULL) {
n = PyList_GET_SIZE(self);
for (i=0 ; i<n ; i++) {
value = PyList_GET_ITEM(self, i);
key = PyObject_CallFunctionObjArgs(keyfunc, value, NULL);
if (key == NULL)
goto dsu_fail;
kvpair = build_sortwrapper(key, value);
if (kvpair == NULL)
goto dsu_fail;
PyList_SET_ITEM(self, i, kvpair);
}
}
/* Reverse sort stability achieved by initialially reversing the list,
applying a stable forward sort, then reversing the final result. */
if (reverse && self->ob_size > 1)
reverse_slice(self->ob_item, self->ob_item + self->ob_size);
merge_init(&ms, compare); merge_init(&ms, compare);
...@@ -1758,6 +1965,21 @@ fail: ...@@ -1758,6 +1965,21 @@ fail:
self->ob_size = saved_ob_size; self->ob_size = saved_ob_size;
self->ob_item = saved_ob_item; self->ob_item = saved_ob_item;
merge_freemem(&ms); merge_freemem(&ms);
if (keyfunc != NULL) {
for (i=0 ; i<n ; i++) {
kvpair = PyList_GET_ITEM(self, i);
value = sortwrapper_getvalue(kvpair);
PyList_SET_ITEM(self, i, value);
Py_DECREF(kvpair);
}
}
if (reverse && self->ob_size > 1)
reverse_slice(self->ob_item, self->ob_item + self->ob_size);
dsu_fail:
Py_XDECREF(compare);
Py_XINCREF(result); Py_XINCREF(result);
return result; return result;
} }
...@@ -1771,7 +1993,7 @@ PyList_Sort(PyObject *v) ...@@ -1771,7 +1993,7 @@ PyList_Sort(PyObject *v)
PyErr_BadInternalCall(); PyErr_BadInternalCall();
return -1; return -1;
} }
v = listsort((PyListObject *)v, (PyObject *)NULL); v = listsort((PyListObject *)v, (PyObject *)NULL, (PyObject *)NULL);
if (v == NULL) if (v == NULL)
return -1; return -1;
Py_DECREF(v); Py_DECREF(v);
...@@ -2111,7 +2333,8 @@ PyDoc_STRVAR(count_doc, ...@@ -2111,7 +2333,8 @@ PyDoc_STRVAR(count_doc,
PyDoc_STRVAR(reverse_doc, PyDoc_STRVAR(reverse_doc,
"L.reverse() -- reverse *IN PLACE*"); "L.reverse() -- reverse *IN PLACE*");
PyDoc_STRVAR(sort_doc, PyDoc_STRVAR(sort_doc,
"L.sort(cmpfunc=None) -- stable sort *IN PLACE*; cmpfunc(x, y) -> -1, 0, 1"); "L.sort(cmp=None, key=None, reverse=False) -- stable sort *IN PLACE*;\n\
cmp(x, y) -> -1, 0, 1");
static PyMethodDef list_methods[] = { static PyMethodDef list_methods[] = {
{"append", (PyCFunction)listappend, METH_O, append_doc}, {"append", (PyCFunction)listappend, METH_O, append_doc},
...@@ -2122,7 +2345,7 @@ static PyMethodDef list_methods[] = { ...@@ -2122,7 +2345,7 @@ static PyMethodDef list_methods[] = {
{"index", (PyCFunction)listindex, METH_VARARGS, index_doc}, {"index", (PyCFunction)listindex, METH_VARARGS, index_doc},
{"count", (PyCFunction)listcount, METH_O, count_doc}, {"count", (PyCFunction)listcount, METH_O, count_doc},
{"reverse", (PyCFunction)listreverse, METH_NOARGS, reverse_doc}, {"reverse", (PyCFunction)listreverse, METH_NOARGS, reverse_doc},
{"sort", (PyCFunction)listsort, METH_VARARGS, sort_doc}, {"sort", (PyCFunction)listsort, METH_VARARGS | METH_KEYWORDS, sort_doc},
{NULL, NULL} /* sentinel */ {NULL, NULL} /* sentinel */
}; };
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment