Commit 9be62833 authored by Guido van Rossum's avatar Guido van Rossum

Tim's quicksort on May 25.

parent 16653cb2
...@@ -625,46 +625,11 @@ docompare(x, y, compare) ...@@ -625,46 +625,11 @@ docompare(x, y, compare)
} }
/* MINSIZE is the smallest array we care to partition; smaller arrays /* MINSIZE is the smallest array we care to partition; smaller arrays
are sorted using a straight insertion sort (above). It must be at are sorted using binary insertion. It must be at least 4 for the
least 3 for the quicksort implementation to work. Assuming that quicksort implementation to work. Binary insertion always requires
comparisons are more expensive than everything else (and this is a fewer compares than quicksort, but does O(N**2) data movement. The
good assumption for Python), it should be 10, which is the cutoff more expensive compares, the larger MINSIZE should be. */
point: quicksort requires more comparisons than insertion sort for #define MINSIZE 49
smaller arrays. */
#define MINSIZE 10
/* Straight insertion sort. More efficient for sorting small arrays. */
static int
insertionsort(array, size, compare)
PyObject **array; /* Start of array to sort */
int size; /* Number of elements to sort */
PyObject *compare;/* Comparison function object, or NULL => default */
{
register PyObject **a = array;
register PyObject **end = array+size;
register PyObject **p;
for (p = a+1; p < end; p++) {
register PyObject *key = *p;
register PyObject **q = p;
while (--q >= a) {
register int k = docompare(key, *q, compare);
/* if (p-q >= MINSIZE)
fprintf(stderr, "OUCH! %d\n", p-q); */
if (k == CMPERROR)
return -1;
if (k < 0) {
*(q+1) = *q;
*q = key; /* For consistency */
}
else
break;
}
}
return 0;
}
/* STACKSIZE is the size of our work stack. A rough estimate is that /* STACKSIZE is the size of our work stack. A rough estimate is that
this allows us to sort arrays of MINSIZE * 2**STACKSIZE, or large this allows us to sort arrays of MINSIZE * 2**STACKSIZE, or large
...@@ -673,7 +638,7 @@ insertionsort(array, size, compare) ...@@ -673,7 +638,7 @@ insertionsort(array, size, compare)
exactly in two.) */ exactly in two.) */
#define STACKSIZE 64 #define STACKSIZE 64
/* Quicksort algorithm. Return -1 if an exception occurred; in this /* quicksort algorithm. Return -1 if an exception occurred; in this
case we leave the array partly sorted but otherwise in good health case we leave the array partly sorted but otherwise in good health
(i.e. no items have been removed or duplicated). */ (i.e. no items have been removed or duplicated). */
...@@ -685,8 +650,8 @@ quicksort(array, size, compare) ...@@ -685,8 +650,8 @@ quicksort(array, size, compare)
{ {
register PyObject *tmp, *pivot; register PyObject *tmp, *pivot;
register PyObject **l, **r, **p; register PyObject **l, **r, **p;
register PyObject **lo, **hi; PyObject **lo, **hi, **notp;
int top, k, n; int top, k, n, lisp, risp;
PyObject **lostack[STACKSIZE]; PyObject **lostack[STACKSIZE];
PyObject **histack[STACKSIZE]; PyObject **histack[STACKSIZE];
...@@ -699,55 +664,66 @@ quicksort(array, size, compare) ...@@ -699,55 +664,66 @@ quicksort(array, size, compare)
while (--top >= 0) { while (--top >= 0) {
lo = lostack[top]; lo = lostack[top];
hi = histack[top]; hi = histack[top];
/* If it's a small one, use straight insertion sort */
n = hi - lo; n = hi - lo;
if (n < MINSIZE)
/* If it's a small one, use binary insertion sort */
if (n < MINSIZE) {
for (notp = lo+1; notp < hi; ++notp) {
/* set l to where *notp belongs */
l = lo;
r = notp;
pivot = *r;
do {
p = l + ((r - l) >> 1);
k = docompare(pivot, *p, compare);
if (k == CMPERROR)
return -1;
if (k < 0)
r = p;
else
l = p + 1;
} while (l < r);
/* Pivot should go at l -- slide over to
make room. Caution: using memmove
is much slower under MSVC 5; we're
not usually moving many slots. */
for (p = notp; p > l; --p)
*p = *(p-1);
*l = pivot;
}
continue; continue;
}
/* Choose median of first, middle and last as pivot; /* Choose median of first, middle and last as pivot */
these 3 are reverse-sorted in the process; the ends
will be swapped on the first do-loop iteration.
*/
l = lo; /* First */ l = lo; /* First */
p = lo + (n>>1); /* Middle */ p = lo + (n>>1); /* Middle */
r = hi - 1; /* Last */ r = hi - 1; /* Last */
k = docompare(*l, *p, compare); k = docompare(*p, *l, compare);
if (k == CMPERROR) if (k == CMPERROR)
return -1; return -1;
if (k < 0) if (k < 0)
{ tmp = *l; *l = *p; *p = tmp; } { tmp = *p; *p = *l; *l = tmp; }
k = docompare(*p, *r, compare); k = docompare(*r, *p, compare);
if (k == CMPERROR) if (k == CMPERROR)
return -1; return -1;
if (k < 0) if (k < 0)
{ tmp = *p; *p = *r; *r = tmp; } { tmp = *r; *r = *p; *p = tmp; }
k = docompare(*l, *p, compare); k = docompare(*p, *l, compare);
if (k == CMPERROR) if (k == CMPERROR)
return -1; return -1;
if (k < 0) if (k < 0)
{ tmp = *l; *l = *p; *p = tmp; } { tmp = *p; *p = *l; *l = tmp; }
pivot = *p; pivot = *p;
/* Partition the array */
do {
tmp = *l; *l = *r; *r = tmp;
if (l == p) {
p = r;
l++; l++;
}
else if (r == p) {
p = l;
r--; r--;
}
else { /* Partition the array */
l++; for (;;) {
r--; lisp = risp = 1; /* presumed guilty */
}
/* Move left index to element >= pivot */ /* Move left index to element >= pivot */
while (l < p) { while (l < p) {
...@@ -756,9 +732,11 @@ quicksort(array, size, compare) ...@@ -756,9 +732,11 @@ quicksort(array, size, compare)
return -1; return -1;
if (k < 0) if (k < 0)
l++; l++;
else else {
lisp = 0;
break; break;
} }
}
/* Move right index to element <= pivot */ /* Move right index to element <= pivot */
while (r > p) { while (r > p) {
k = docompare(pivot, *r, compare); k = docompare(pivot, *r, compare);
...@@ -766,27 +744,76 @@ quicksort(array, size, compare) ...@@ -766,27 +744,76 @@ quicksort(array, size, compare)
return -1; return -1;
if (k < 0) if (k < 0)
r--; r--;
else else {
risp = 0;
break; break;
} }
}
} while (l < r); if (lisp == risp) {
/* assert l < p < r or l == p == r
* This is the most common case, so we
* strive to get back to the top of the
* loop ASAP.
*/
tmp = *l; *l = *r; *r = tmp;
l++; r--;
if (l < r)
continue;
break;
}
/* One (exactly) of the pointers is at p */
/* assert (p == l) ^ (p == r) */
notp = lisp ? r : l;
k = (r - l) >> 1;
if (k) {
*p = *notp;
if (lisp) {
p = r - k;
l++;
}
else {
p = l + k;
r--;
}
/* assert l < p < r */
*notp = *p;
*p = pivot; /* for consistency */
continue;
}
/* lo < l == p == r < hi-1 /* assert l+1 == r */
*p == pivot *p = *notp;
*notp = pivot;
p = notp;
break;
} /* end of partitioning loop */
/* assert *p == pivot
All in [lo,p) are <= pivot All in [lo,p) are <= pivot
At p == pivot At p == pivot
All in [p+1,hi) are >= pivot All in [p+1,hi) are >= pivot
*/
Now extend as far as possible (around p) so that: r = p;
l = p + 1;
/* Partitions are [lo,r) and [l,hi).
* See whether *l == pivot; we know *l >= pivot, so
* they're equal iff *l <= pivot too, or not pivot < *l.
* This wastes a compare if it fails, but can win big
* when there are runs of duplicates.
*/
k = docompare(pivot, *l, compare);
if (k == CMPERROR)
return -1;
if (!(k < 0)) {
/* Now extend as far as possible (around p) so that:
All in [lo,r) are <= pivot All in [lo,r) are <= pivot
All in [r,l) are == pivot All in [r,l) are == pivot
All in [l,hi) are >= pivot All in [l,hi) are >= pivot
This wastes two compares if no elements are == to the Mildly tricky: continue using only "<" -- we
pivot, but can win big when there are duplicates. deduce equality indirectly.
Mildly tricky: continue using only "<" -- we deduce
equality indirectly.
*/ */
while (r > lo) { while (r > lo) {
/* because r-1 < p, *(r-1) <= pivot is known */ /* because r-1 < p, *(r-1) <= pivot is known */
...@@ -811,6 +838,8 @@ quicksort(array, size, compare) ...@@ -811,6 +838,8 @@ quicksort(array, size, compare)
l++; l++;
} }
} /* end of checking for duplicates */
/* Push biggest partition first */ /* Push biggest partition first */
if (r - lo >= hi - l) { if (r - lo >= hi - l) {
/* First one is bigger */ /* First one is bigger */
...@@ -828,17 +857,6 @@ quicksort(array, size, compare) ...@@ -828,17 +857,6 @@ quicksort(array, size, compare)
/* Should assert top <= STACKSIZE */ /* Should assert top <= STACKSIZE */
} }
/*
* Ouch - even if I screwed up the quicksort above, the
* insertionsort below will cover up the problem - just a
* performance hit would be noticable.
*/
/* insertionsort is pretty fast on the partially sorted list */
if (insertionsort(array, size, compare) < 0)
return -1;
/* Success */ /* Success */
return 0; return 0;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment