Commit b7057640 authored by Guido van Rossum's avatar Guido van Rossum

Tim's quicksort on May 10.

parent 01fc65d9
...@@ -624,6 +624,15 @@ docompare(x, y, compare) ...@@ -624,6 +624,15 @@ docompare(x, y, compare)
return 0; return 0;
} }
/* MINSIZE is the smallest array we care to partition; smaller arrays
are sorted using a straight insertion sort (above). It must be at
least 3 for the quicksort implementation to work. Assuming that
comparisons are more expensive than everything else (and this is a
good assumption for Python), it should be 10, which is the cutoff
point: quicksort requires more comparisons than insertion sort for
smaller arrays. */
#define MINSIZE 12
/* Straight insertion sort. More efficient for sorting small arrays. */ /* Straight insertion sort. More efficient for sorting small arrays. */
static int static int
...@@ -640,30 +649,23 @@ insertionsort(array, size, compare) ...@@ -640,30 +649,23 @@ insertionsort(array, size, compare)
register PyObject *key = *p; register PyObject *key = *p;
register PyObject **q = p; register PyObject **q = p;
while (--q >= a) { while (--q >= a) {
register int k = docompare(*q, key, compare); register int k = docompare(key, *q, compare);
/* if (p-q >= MINSIZE) /* if (p-q >= MINSIZE)
fprintf(stderr, "OUCH! %d\n", p-q); */ fprintf(stderr, "OUCH! %d\n", p-q); */
if (k == CMPERROR) if (k == CMPERROR)
return -1; return -1;
if (k <= 0) if (k < 0) {
*(q+1) = *q;
*q = key; /* For consistency */
}
else
break; break;
*(q+1) = *q;
*q = key; /* For consistency */
} }
} }
return 0; return 0;
} }
/* MINSIZE is the smallest array we care to partition; smaller arrays
are sorted using a straight insertion sort (above). It must be at
least 2 for the quicksort implementation to work. Assuming that
comparisons are more expensive than everything else (and this is a
good assumption for Python), it should be 10, which is the cutoff
point: quicksort requires more comparisons than insertion sort for
smaller arrays. */
#define MINSIZE 10
/* STACKSIZE is the size of our work stack. A rough estimate is that /* STACKSIZE is the size of our work stack. A rough estimate is that
this allows us to sort arrays of MINSIZE * 2**STACKSIZE, or large this allows us to sort arrays of MINSIZE * 2**STACKSIZE, or large
enough. (Because of the way we push the biggest partition first, enough. (Because of the way we push the biggest partition first,
...@@ -682,8 +684,9 @@ quicksort(array, size, compare) ...@@ -682,8 +684,9 @@ quicksort(array, size, compare)
PyObject *compare;/* Comparison function object, or NULL for default */ PyObject *compare;/* Comparison function object, or NULL for default */
{ {
register PyObject *tmp, *pivot; register PyObject *tmp, *pivot;
register PyObject **lo, **hi, **l, **r; register PyObject **l, **r, **p;
int top, k, n, n2; register PyObject **lo, **hi;
int top, k, n;
PyObject **lostack[STACKSIZE]; PyObject **lostack[STACKSIZE];
PyObject **histack[STACKSIZE]; PyObject **histack[STACKSIZE];
...@@ -699,88 +702,117 @@ quicksort(array, size, compare) ...@@ -699,88 +702,117 @@ quicksort(array, size, compare)
/* If it's a small one, use straight insertion sort */ /* If it's a small one, use straight insertion sort */
n = hi - lo; n = hi - lo;
if (n < MINSIZE) { if (n < MINSIZE)
/*
* skip it. The insertion sort at the end will
* catch these
*/
continue; continue;
}
/* Choose median of first, middle and last item as pivot */ /* Choose median of first, middle and last as pivot;
these 3 are reverse-sorted in the process; the ends
l = lo + (n>>1); /* Middle */ will be swapped on the first do-loop iteration.
r = hi - 1; /* Last */ */
l = lo; /* First */
p = lo + (n>>1); /* Middle */
r = hi - 1; /* Last */
k = docompare(*l, *lo, compare); k = docompare(*l, *p, compare);
if (k == CMPERROR) if (k == CMPERROR)
return -1; return -1;
if (k < 0) if (k < 0)
{ tmp = *lo; *lo = *l; *l = tmp; } { tmp = *l; *l = *p; *p = tmp; }
k = docompare(*r, *l, compare); k = docompare(*p, *r, compare);
if (k == CMPERROR) if (k == CMPERROR)
return -1; return -1;
if (k < 0) if (k < 0)
{ tmp = *r; *r = *l; *l = tmp; } { tmp = *p; *p = *r; *r = tmp; }
k = docompare(*l, *lo, compare); k = docompare(*l, *p, compare);
if (k == CMPERROR) if (k == CMPERROR)
return -1; return -1;
if (k < 0) if (k < 0)
{ tmp = *l; *l = *lo; *lo = tmp; } { tmp = *l; *l = *p; *p = tmp; }
pivot = *l;
/* Move pivot off to the side (swap with lo+1) */ pivot = *p;
*l = *(lo+1); *(lo+1) = pivot;
/* Partition the array */ /* Partition the array */
l = lo+2;
r = hi-2;
do { do {
tmp = *l; *l = *r; *r = tmp;
if (l == p) {
p = r;
l++;
}
else if (r == p) {
p = l;
r--;
}
else {
l++;
r--;
}
/* Move left index to element >= pivot */ /* Move left index to element >= pivot */
while (l < hi) { while (l < p) {
k = docompare(*l, pivot, compare); k = docompare(*l, pivot, compare);
if (k == CMPERROR) if (k == CMPERROR)
return -1; return -1;
if (k >= 0) if (k < 0)
l++;
else
break; break;
l++;
} }
/* Move right index to element <= pivot */ /* Move right index to element <= pivot */
while (r > lo) { while (r > p) {
k = docompare(pivot, *r, compare); k = docompare(pivot, *r, compare);
if (k == CMPERROR) if (k == CMPERROR)
return -1; return -1;
if (k >= 0) if (k < 0)
r--;
else
break; break;
r--;
}
/* If they crossed, we're through */
if (l <= r) {
/* Swap elements and continue */
tmp = *l; *l = *r; *r = tmp;
l++; r--;
} }
} while (l <= r); } while (l < r);
/* Swap pivot back into place; *r <= pivot */ /* lo < l == p == r < hi-1
*(lo+1) = *r; *r = pivot; *p == pivot
All in [lo,p) are <= pivot
At p == pivot
All in [p+1,hi) are >= pivot
Now extend as far as possible (around p) so that:
All in [lo,r) are <= pivot
All in [r,l) are == pivot
All in [l,hi) are >= pivot
This wastes two compares if no elements are == to the
pivot, but can win big when there are duplicates.
Mildly tricky: continue using only "<" -- we deduce
equality indirectly.
*/
while (r > lo) {
/* because r-1 < p, *(r-1) <= pivot is known */
k = docompare(*(r-1), pivot, compare);
if (k == CMPERROR)
return -1;
if (k < 0)
break;
/* <= and not < implies == */
r--;
}
/* We have now reached the following conditions: l++;
lo <= r < l <= hi while (l < hi) {
all x in [lo,r) are <= pivot /* because l > p, pivot <= *l is known */
all x in [r,l) are == pivot k = docompare(pivot, *l, compare);
all x in [l,hi) are >= pivot if (k == CMPERROR)
The partitions are [lo,r) and [l,hi) return -1;
*/ if (k < 0)
break;
/* <= and not < implies == */
l++;
}
/* Push biggest partition first */ /* Push biggest partition first */
n = r - lo; if (r - lo >= hi - l) {
n2 = hi - l;
if (n > n2) {
/* First one is bigger */ /* First one is bigger */
lostack[top] = lo; lostack[top] = lo;
histack[top++] = r; histack[top++] = r;
...@@ -793,22 +825,21 @@ quicksort(array, size, compare) ...@@ -793,22 +825,21 @@ quicksort(array, size, compare)
lostack[top] = lo; lostack[top] = lo;
histack[top++] = r; histack[top++] = r;
} }
/* Should assert top <= STACKSIZE */ /* Should assert top <= STACKSIZE */
} }
/* /*
* Ouch - even if I screwed up the quicksort above, the * Ouch - even if I screwed up the quicksort above, the
* insertionsort below will cover up the problem - just a * insertionsort below will cover up the problem - just a
* performance hit would be noticable. * performance hit would be noticable.
*/ */
/* insertionsort is pretty fast on the partially sorted list */ /* insertionsort is pretty fast on the partially sorted list */
if (insertionsort(array, size, compare) < 0) if (insertionsort(array, size, compare) < 0)
return -1; return -1;
/* Succes */ /* Success */
return 0; return 0;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment