Commit 9ef4b981 authored by Raymond Hettinger's avatar Raymond Hettinger

Misc improvements to collections.deque()

* Clarified comment on the impact of BLOCKLEN on deque_index
  (with a power-of-two, the division and modulo
   computations are done with a right-shift and bitwise-and).

* Clarified comment on the overflow check to note that
  it is general and not just applicable the 64-bit builds.

* In deque._rotate(), the "deque->" indirections are
  factored-out of the loop (loop invariant code motion),
  leaving the code cleaner looking and slightly faster.

* In deque._rotate(), replaced the memcpy() with an
  equivalent loop.  That saved the memcpy setup time
  and allowed the pointers to move in their natural
  leftward and rightward directions.

See comparative timings at:  http://pastebin.com/p0RJnT5N
parent e7b9abac
...@@ -8,9 +8,10 @@ ...@@ -8,9 +8,10 @@
*/ */
/* The block length may be set to any number over 1. Larger numbers /* The block length may be set to any number over 1. Larger numbers
* reduce the number of calls to the memory allocator but take more * reduce the number of calls to the memory allocator, give faster
* memory. Ideally, (BLOCKLEN+2) should be set to a multiple of the * indexing and rotation, and reduce the link::data overhead ratio.
* length of a cache line. * If the block length is a power-of-two, we also get faster
* division/modulo computations during indexing.
*/ */
#define BLOCKLEN 62 #define BLOCKLEN 62
...@@ -47,8 +48,8 @@ ...@@ -47,8 +48,8 @@
typedef struct BLOCK { typedef struct BLOCK {
struct BLOCK *leftlink; struct BLOCK *leftlink;
struct BLOCK *rightlink;
PyObject *data[BLOCKLEN]; PyObject *data[BLOCKLEN];
struct BLOCK *rightlink;
} block; } block;
#define MAXFREEBLOCKS 10 #define MAXFREEBLOCKS 10
...@@ -58,13 +59,8 @@ static block *freeblocks[MAXFREEBLOCKS]; ...@@ -58,13 +59,8 @@ static block *freeblocks[MAXFREEBLOCKS];
static block * static block *
newblock(block *leftlink, block *rightlink, Py_ssize_t len) { newblock(block *leftlink, block *rightlink, Py_ssize_t len) {
block *b; block *b;
/* To prevent len from overflowing PY_SSIZE_T_MAX on 64-bit machines, we /* To prevent len from overflowing PY_SSIZE_T_MAX, we refuse to
* refuse to allocate new blocks if the current len is dangerously * allocate new blocks if the current len is nearing overflow. */
* close. There is some extra margin to prevent spurious arithmetic
* overflows at various places. The following check ensures that
* the blocks allocated to the deque, in the worst case, can only
* have PY_SSIZE_T_MAX-2 entries in total.
*/
if (len >= PY_SSIZE_T_MAX - 2*BLOCKLEN) { if (len >= PY_SSIZE_T_MAX - 2*BLOCKLEN) {
PyErr_SetString(PyExc_OverflowError, PyErr_SetString(PyExc_OverflowError,
"cannot add more blocks to the deque"); "cannot add more blocks to the deque");
...@@ -413,7 +409,12 @@ deque_inplace_concat(dequeobject *deque, PyObject *other) ...@@ -413,7 +409,12 @@ deque_inplace_concat(dequeobject *deque, PyObject *other)
static int static int
_deque_rotate(dequeobject *deque, Py_ssize_t n) _deque_rotate(dequeobject *deque, Py_ssize_t n)
{ {
Py_ssize_t m, len=deque->len, halflen=len>>1; block *leftblock = deque->leftblock;
block *rightblock = deque->rightblock;
Py_ssize_t leftindex = deque->leftindex;
Py_ssize_t rightindex = deque->rightindex;
Py_ssize_t len=deque->len, halflen=len>>1;
int rv = 0;
if (len <= 1) if (len <= 1)
return 0; return 0;
...@@ -429,76 +430,96 @@ _deque_rotate(dequeobject *deque, Py_ssize_t n) ...@@ -429,76 +430,96 @@ _deque_rotate(dequeobject *deque, Py_ssize_t n)
deque->state++; deque->state++;
while (n > 0) { while (n > 0) {
if (deque->leftindex == 0) { if (leftindex == 0) {
block *b = newblock(NULL, deque->leftblock, len); block *b = newblock(NULL, leftblock, len);
if (b == NULL) if (b == NULL) {
return -1; rv = -1;
assert(deque->leftblock->leftlink == NULL); goto done;
deque->leftblock->leftlink = b; }
deque->leftblock = b; assert(leftblock->leftlink == NULL);
deque->leftindex = BLOCKLEN; leftblock->leftlink = b;
leftblock = b;
leftindex = BLOCKLEN;
} }
assert(deque->leftindex > 0); assert(leftindex > 0);
m = n; {
if (m > deque->rightindex + 1) PyObject **src, **dest;
m = deque->rightindex + 1; Py_ssize_t m = n;
if (m > deque->leftindex)
m = deque->leftindex; if (m > rightindex + 1)
assert (m > 0 && m <= len); m = rightindex + 1;
memcpy(&deque->leftblock->data[deque->leftindex - m], if (m > leftindex)
&deque->rightblock->data[deque->rightindex + 1 - m], m = leftindex;
m * sizeof(PyObject *)); assert (m > 0 && m <= len);
deque->rightindex -= m; src = &rightblock->data[rightindex];
deque->leftindex -= m; dest = &leftblock->data[leftindex - 1];
n -= m; rightindex -= m;
leftindex -= m;
if (deque->rightindex == -1) { n -= m;
block *prevblock = deque->rightblock->leftlink; while (m--)
assert(deque->rightblock != NULL); *(dest--) = *(src--);
assert(deque->leftblock != deque->rightblock); }
freeblock(deque->rightblock);
if (rightindex == -1) {
block *prevblock = rightblock->leftlink;
assert(rightblock != NULL);
assert(leftblock != rightblock);
freeblock(rightblock);
prevblock->rightlink = NULL; prevblock->rightlink = NULL;
deque->rightblock = prevblock; rightblock = prevblock;
deque->rightindex = BLOCKLEN - 1; rightindex = BLOCKLEN - 1;
} }
} }
while (n < 0) { while (n < 0) {
if (deque->rightindex == BLOCKLEN - 1) { if (rightindex == BLOCKLEN - 1) {
block *b = newblock(deque->rightblock, NULL, len); block *b = newblock(rightblock, NULL, len);
if (b == NULL) if (b == NULL) {
return -1; rv = -1;
assert(deque->rightblock->rightlink == NULL); goto done;
deque->rightblock->rightlink = b; }
deque->rightblock = b; assert(rightblock->rightlink == NULL);
deque->rightindex = -1; rightblock->rightlink = b;
rightblock = b;
rightindex = -1;
} }
assert (deque->rightindex < BLOCKLEN - 1); assert (rightindex < BLOCKLEN - 1);
m = -n; {
if (m > BLOCKLEN - deque->leftindex) PyObject **src, **dest;
m = BLOCKLEN - deque->leftindex; Py_ssize_t m = -n;
if (m > BLOCKLEN - 1 - deque->rightindex)
m = BLOCKLEN - 1 - deque->rightindex; if (m > BLOCKLEN - leftindex)
assert (m > 0 && m <= len); m = BLOCKLEN - leftindex;
memcpy(&deque->rightblock->data[deque->rightindex + 1], if (m > BLOCKLEN - 1 - rightindex)
&deque->leftblock->data[deque->leftindex], m = BLOCKLEN - 1 - rightindex;
m * sizeof(PyObject *)); assert (m > 0 && m <= len);
deque->leftindex += m; src = &leftblock->data[leftindex];
deque->rightindex += m; dest = &rightblock->data[rightindex + 1];
n += m; leftindex += m;
rightindex += m;
if (deque->leftindex == BLOCKLEN) { n += m;
block *nextblock = deque->leftblock->rightlink; while (m--)
assert(deque->leftblock != deque->rightblock); *(dest++) = *(src++);
freeblock(deque->leftblock); }
if (leftindex == BLOCKLEN) {
block *nextblock = leftblock->rightlink;
assert(leftblock != rightblock);
freeblock(leftblock);
assert(nextblock != NULL); assert(nextblock != NULL);
nextblock->leftlink = NULL; nextblock->leftlink = NULL;
deque->leftblock = nextblock; leftblock = nextblock;
deque->leftindex = 0; leftindex = 0;
} }
} }
return 0; done:
deque->leftblock = leftblock;
deque->rightblock = rightblock;
deque->leftindex = leftindex;
deque->rightindex = rightindex;
return rv;
} }
static PyObject * static PyObject *
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment