Commit 9c97b786 authored by Guido van Rossum's avatar Guido van Rossum

Changed the lookup algorithm again, based on Reimer Behrends's post.

The table size is now constrained to be a power of two, and we use a
variable increment based on GF(2^n)-{0} (not that I have the faintest
idea what that is :-) which helps avoid the expensive '%' operation.

Some of the entries in the table of polynomials have been modified
according to a post by Tim Peters.
parent fb5911dc
...@@ -42,21 +42,47 @@ PERFORMANCE OF THIS SOFTWARE. ...@@ -42,21 +42,47 @@ PERFORMANCE OF THIS SOFTWARE.
/* /*
Table of primes suitable as keys, in ascending order. * MINSIZE is the minimum size of a mapping.
The first line are the largest primes less than some powers of two, */
the second line is the largest prime less than 6000,
the third line is a selection from Knuth, Vol. 3, Sec. 6.1, Table 1, #define MINSIZE 4
and the next three lines were suggested by Steve Kirsch.
The final value is a sentinel. /*
Table of irreducible polynomials to efficiently cycle through
GF(2^n)-{0}, 2<=n<=30.
*/ */
static long primes[] = { static long polys[] = {
3, 7, 13, 31, 61, 127, 251, 509, 1021, 2017, 4093, 4 + 3,
5987, 8 + 3,
9551, 15683, 19609, 31397, 16 + 3,
65521L, 131071L, 262139L, 524287L, 1048573L, 2097143L, 32 + 5,
4194301L, 8388593L, 16777213L, 33554393L, 67108859L, 64 + 3,
134217689L, 268435399L, 536870909L, 1073741789L, 128 + 3,
0 256 + 29,
512 + 17,
1024 + 9,
2048 + 5,
4096 + 83,
8192 + 27,
16384 + 43,
32768 + 3,
65536 + 45,
131072 + 9,
262144 + 39,
524288 + 39,
1048576 + 9,
2097152 + 5,
4194304 + 3,
8388608 + 33,
16777216 + 27,
33554432 + 9,
67108864 + 71,
134217728 + 39,
/* Not verified by Tim P: */
268435456 + 3,
536870912 + 5,
1073741824 + 3,
0
}; };
/* Object used as dummy key to fill deleted entries */ /* Object used as dummy key to fill deleted entries */
...@@ -87,6 +113,7 @@ typedef struct { ...@@ -87,6 +113,7 @@ typedef struct {
int ma_fill; int ma_fill;
int ma_used; int ma_used;
int ma_size; int ma_size;
int ma_poly;
mappingentry *ma_table; mappingentry *ma_table;
} mappingobject; } mappingobject;
...@@ -103,6 +130,7 @@ newmappingobject() ...@@ -103,6 +130,7 @@ newmappingobject()
if (mp == NULL) if (mp == NULL)
return NULL; return NULL;
mp->ma_size = 0; mp->ma_size = 0;
mp->ma_poly = 0;
mp->ma_table = NULL; mp->ma_table = NULL;
mp->ma_fill = 0; mp->ma_fill = 0;
mp->ma_used = 0; mp->ma_used = 0;
...@@ -111,9 +139,12 @@ newmappingobject() ...@@ -111,9 +139,12 @@ newmappingobject()
/* /*
The basic lookup function used by all operations. The basic lookup function used by all operations.
This is essentially Algorithm D from Knuth Vol. 3, Sec. 6.4. This is based on Algorithm D from Knuth Vol. 3, Sec. 6.4.
Open addressing is preferred over chaining since the link overhead for Open addressing is preferred over chaining since the link overhead for
chaining would be substantial (100% with typical malloc overhead). chaining would be substantial (100% with typical malloc overhead).
However, instead of going through the table at constant steps, we cycle
through the values of GF(2^n)-{0}. This avoids modulo computations, being
much cheaper on RISC machines, without leading to clustering.
First a 32-bit hash value, 'sum', is computed from the key string. First a 32-bit hash value, 'sum', is computed from the key string.
The first character is added an extra time shifted by 8 to avoid hashing The first character is added an extra time shifted by 8 to avoid hashing
...@@ -121,10 +152,11 @@ single-character keys (often heavily used variables) too close together. ...@@ -121,10 +152,11 @@ single-character keys (often heavily used variables) too close together.
All arithmetic on sum should ignore overflow. All arithmetic on sum should ignore overflow.
The initial probe index is then computed as sum mod the table size. The initial probe index is then computed as sum mod the table size.
Subsequent probe indices are incr apart (mod table size), where incr Subsequent probe indices use the values of x^i in GF(2^n) as an offset,
is also derived from sum, with the additional requirement that it is where x is a root. The initial value is derived from sum, too.
relative prime to the table size (i.e., 1 <= incr < size, since the size
is a prime number). My choice for incr is somewhat arbitrary. (This version is due to Reimer Behrends, some ideas are also due to
Jyrki Alakuijala.)
*/ */
static mappingentry *lookmapping PROTO((mappingobject *, object *, long)); static mappingentry *lookmapping PROTO((mappingobject *, object *, long));
static mappingentry * static mappingentry *
...@@ -133,97 +165,56 @@ lookmapping(mp, key, hash) ...@@ -133,97 +165,56 @@ lookmapping(mp, key, hash)
object *key; object *key;
long hash; long hash;
{ {
/* Optimizations based on observations by Jyrki Alakuijala register int i;
(paraphrased): register unsigned incr;
register unsigned long sum = (unsigned long) hash;
- This routine is very heavily used, so should be AFAP register mappingentry *freeslot = NULL;
(As Fast As Possible). register int mask = mp->ma_size-1;
register mappingentry *ep = &mp->ma_table[i];
- Most of the time, the first try is a hit or a definite /* We must come up with (i, incr) such that 0 <= i < ma_size
miss; so postpone the calculation of incr until we know the and 0 < incr < ma_size and both are a function of hash */
first try was a miss. i = (~sum) & mask;
/* We use ~sum instead if sum, as degenerate hash functions, such
- Write the loop twice, so we can move the test for as for ints <sigh>, can have lots of leading zeros. It's not
freeslot==NULL out of the loop. really a performance risk, but better safe than sorry. */
ep = &mp->ma_table[i];
- Write the loop using pointer increments and comparisons if (ep->me_key == NULL)
rather than using an integer loop index.
Note that it behooves the compiler to calculate the values
of incr*sizeof(*ep) outside the loops and use this in the
increment of ep. I've reduced the number of register
variables to the two most obvious candidates.
*/
register mappingentry *ep;
mappingentry *end;
register object *ekey;
mappingentry *freeslot;
unsigned long sum;
int incr;
int size;
ep = &mp->ma_table[(unsigned long)hash%mp->ma_size];
ekey = ep->me_key;
if (ekey == NULL)
return ep; return ep;
#ifdef INTERN_STRINGS if (ep->me_key == dummy)
{
object *ikey;
if (is_stringobject(key) &&
(ikey = ((stringobject *)key)->ob_sinterned) != NULL)
key = ikey;
}
#endif
if (ekey == dummy)
freeslot = ep; freeslot = ep;
else { else if (ep->me_key == key ||
if (ekey == key) (ep->me_hash == hash && cmpobject(ep->me_key, key) == 0)) {
return ep; return ep;
if (ep->me_hash == hash && cmpobject(ekey, key) == 0)
return ep;
freeslot = NULL;
} }
/* Derive incr from i, just to make it more arbitrary. Note that
size = mp->ma_size; incr must not be 0, or we will get into an infinite loop.*/
sum = hash; incr = i << 1;
do { if (!incr)
sum += sum + sum + 1; incr = mask;
incr = sum % size; if (incr > mask) /* Cycle through GF(2^n)-{0} */
} while (incr == 0); incr ^= mp->ma_poly; /* This will implicitly clear the
highest bit */
end = mp->ma_table + size; for (;;) {
ep = &mp->ma_table[(i+incr)&mask];
if (freeslot == NULL) { if (ep->me_key == NULL) {
for (;;) { if (freeslot != NULL)
ep += incr; return freeslot;
if (ep >= end) else
ep -= size;
ekey = ep->me_key;
if (ekey == NULL)
return ep; return ep;
if (ekey == dummy) { }
if (ep->me_key == dummy) {
if (freeslot == NULL)
freeslot = ep; freeslot = ep;
break;
}
if (ekey == key || (ep->me_hash == hash &&
cmpobject(ekey, key) == 0))
return ep;
} }
} else if (ep->me_key == key ||
(ep->me_hash == hash &&
for (;;) { cmpobject(ep->me_key, key) == 0)) {
ep += incr;
if (ep >= end)
ep -= size;
ekey = ep->me_key;
if (ekey == NULL)
return freeslot;
if (ekey == key ||
(ekey != dummy &&
ep->me_hash == hash && cmpobject(ekey, key) == 0))
return ep; return ep;
}
/* Cycle through GF(2^n)-{0} */
incr = incr << 1;
if (incr > mask)
incr ^= mp->ma_poly;
} }
} }
...@@ -272,25 +263,20 @@ mappingresize(mp) ...@@ -272,25 +263,20 @@ mappingresize(mp)
mappingobject *mp; mappingobject *mp;
{ {
register int oldsize = mp->ma_size; register int oldsize = mp->ma_size;
register int newsize; register int newsize, newpoly;
register mappingentry *oldtable = mp->ma_table; register mappingentry *oldtable = mp->ma_table;
register mappingentry *newtable; register mappingentry *newtable;
register mappingentry *ep; register mappingentry *ep;
register int i; register int i;
newsize = mp->ma_size; newsize = mp->ma_size;
for (i = 0; ; i++) { for (i = 0, newsize = MINSIZE; ; i++, newsize <<= 1) {
if (primes[i] <= 0) { if (i > sizeof(polys)/sizeof(polys[0])) {
/* Ran out of primes */ /* Ran out of polynomials */
err_nomem(); err_nomem();
return -1; return -1;
} }
if (primes[i] > mp->ma_used*2) { if (newsize > mp->ma_used*2) {
newsize = primes[i]; newpoly = polys[i];
if (newsize != primes[i]) {
/* Integer truncation */
err_nomem();
return -1;
}
break; break;
} }
} }
...@@ -300,6 +286,7 @@ mappingresize(mp) ...@@ -300,6 +286,7 @@ mappingresize(mp)
return -1; return -1;
} }
mp->ma_size = newsize; mp->ma_size = newsize;
mp->ma_poly = newpoly;
mp->ma_table = newtable; mp->ma_table = newtable;
mp->ma_fill = 0; mp->ma_fill = 0;
mp->ma_used = 0; mp->ma_used = 0;
......
...@@ -42,21 +42,47 @@ PERFORMANCE OF THIS SOFTWARE. ...@@ -42,21 +42,47 @@ PERFORMANCE OF THIS SOFTWARE.
/* /*
Table of primes suitable as keys, in ascending order. * MINSIZE is the minimum size of a mapping.
The first line are the largest primes less than some powers of two, */
the second line is the largest prime less than 6000,
the third line is a selection from Knuth, Vol. 3, Sec. 6.1, Table 1, #define MINSIZE 4
and the next three lines were suggested by Steve Kirsch.
The final value is a sentinel. /*
Table of irreducible polynomials to efficiently cycle through
GF(2^n)-{0}, 2<=n<=30.
*/ */
static long primes[] = { static long polys[] = {
3, 7, 13, 31, 61, 127, 251, 509, 1021, 2017, 4093, 4 + 3,
5987, 8 + 3,
9551, 15683, 19609, 31397, 16 + 3,
65521L, 131071L, 262139L, 524287L, 1048573L, 2097143L, 32 + 5,
4194301L, 8388593L, 16777213L, 33554393L, 67108859L, 64 + 3,
134217689L, 268435399L, 536870909L, 1073741789L, 128 + 3,
0 256 + 29,
512 + 17,
1024 + 9,
2048 + 5,
4096 + 83,
8192 + 27,
16384 + 43,
32768 + 3,
65536 + 45,
131072 + 9,
262144 + 39,
524288 + 39,
1048576 + 9,
2097152 + 5,
4194304 + 3,
8388608 + 33,
16777216 + 27,
33554432 + 9,
67108864 + 71,
134217728 + 39,
/* Not verified by Tim P: */
268435456 + 3,
536870912 + 5,
1073741824 + 3,
0
}; };
/* Object used as dummy key to fill deleted entries */ /* Object used as dummy key to fill deleted entries */
...@@ -87,6 +113,7 @@ typedef struct { ...@@ -87,6 +113,7 @@ typedef struct {
int ma_fill; int ma_fill;
int ma_used; int ma_used;
int ma_size; int ma_size;
int ma_poly;
mappingentry *ma_table; mappingentry *ma_table;
} mappingobject; } mappingobject;
...@@ -103,6 +130,7 @@ newmappingobject() ...@@ -103,6 +130,7 @@ newmappingobject()
if (mp == NULL) if (mp == NULL)
return NULL; return NULL;
mp->ma_size = 0; mp->ma_size = 0;
mp->ma_poly = 0;
mp->ma_table = NULL; mp->ma_table = NULL;
mp->ma_fill = 0; mp->ma_fill = 0;
mp->ma_used = 0; mp->ma_used = 0;
...@@ -111,9 +139,12 @@ newmappingobject() ...@@ -111,9 +139,12 @@ newmappingobject()
/* /*
The basic lookup function used by all operations. The basic lookup function used by all operations.
This is essentially Algorithm D from Knuth Vol. 3, Sec. 6.4. This is based on Algorithm D from Knuth Vol. 3, Sec. 6.4.
Open addressing is preferred over chaining since the link overhead for Open addressing is preferred over chaining since the link overhead for
chaining would be substantial (100% with typical malloc overhead). chaining would be substantial (100% with typical malloc overhead).
However, instead of going through the table at constant steps, we cycle
through the values of GF(2^n)-{0}. This avoids modulo computations, being
much cheaper on RISC machines, without leading to clustering.
First a 32-bit hash value, 'sum', is computed from the key string. First a 32-bit hash value, 'sum', is computed from the key string.
The first character is added an extra time shifted by 8 to avoid hashing The first character is added an extra time shifted by 8 to avoid hashing
...@@ -121,10 +152,11 @@ single-character keys (often heavily used variables) too close together. ...@@ -121,10 +152,11 @@ single-character keys (often heavily used variables) too close together.
All arithmetic on sum should ignore overflow. All arithmetic on sum should ignore overflow.
The initial probe index is then computed as sum mod the table size. The initial probe index is then computed as sum mod the table size.
Subsequent probe indices are incr apart (mod table size), where incr Subsequent probe indices use the values of x^i in GF(2^n) as an offset,
is also derived from sum, with the additional requirement that it is where x is a root. The initial value is derived from sum, too.
relative prime to the table size (i.e., 1 <= incr < size, since the size
is a prime number). My choice for incr is somewhat arbitrary. (This version is due to Reimer Behrends, some ideas are also due to
Jyrki Alakuijala.)
*/ */
static mappingentry *lookmapping PROTO((mappingobject *, object *, long)); static mappingentry *lookmapping PROTO((mappingobject *, object *, long));
static mappingentry * static mappingentry *
...@@ -133,97 +165,56 @@ lookmapping(mp, key, hash) ...@@ -133,97 +165,56 @@ lookmapping(mp, key, hash)
object *key; object *key;
long hash; long hash;
{ {
/* Optimizations based on observations by Jyrki Alakuijala register int i;
(paraphrased): register unsigned incr;
register unsigned long sum = (unsigned long) hash;
- This routine is very heavily used, so should be AFAP register mappingentry *freeslot = NULL;
(As Fast As Possible). register int mask = mp->ma_size-1;
register mappingentry *ep = &mp->ma_table[i];
- Most of the time, the first try is a hit or a definite /* We must come up with (i, incr) such that 0 <= i < ma_size
miss; so postpone the calculation of incr until we know the and 0 < incr < ma_size and both are a function of hash */
first try was a miss. i = (~sum) & mask;
/* We use ~sum instead if sum, as degenerate hash functions, such
- Write the loop twice, so we can move the test for as for ints <sigh>, can have lots of leading zeros. It's not
freeslot==NULL out of the loop. really a performance risk, but better safe than sorry. */
ep = &mp->ma_table[i];
- Write the loop using pointer increments and comparisons if (ep->me_key == NULL)
rather than using an integer loop index.
Note that it behooves the compiler to calculate the values
of incr*sizeof(*ep) outside the loops and use this in the
increment of ep. I've reduced the number of register
variables to the two most obvious candidates.
*/
register mappingentry *ep;
mappingentry *end;
register object *ekey;
mappingentry *freeslot;
unsigned long sum;
int incr;
int size;
ep = &mp->ma_table[(unsigned long)hash%mp->ma_size];
ekey = ep->me_key;
if (ekey == NULL)
return ep; return ep;
#ifdef INTERN_STRINGS if (ep->me_key == dummy)
{
object *ikey;
if (is_stringobject(key) &&
(ikey = ((stringobject *)key)->ob_sinterned) != NULL)
key = ikey;
}
#endif
if (ekey == dummy)
freeslot = ep; freeslot = ep;
else { else if (ep->me_key == key ||
if (ekey == key) (ep->me_hash == hash && cmpobject(ep->me_key, key) == 0)) {
return ep; return ep;
if (ep->me_hash == hash && cmpobject(ekey, key) == 0)
return ep;
freeslot = NULL;
} }
/* Derive incr from i, just to make it more arbitrary. Note that
size = mp->ma_size; incr must not be 0, or we will get into an infinite loop.*/
sum = hash; incr = i << 1;
do { if (!incr)
sum += sum + sum + 1; incr = mask;
incr = sum % size; if (incr > mask) /* Cycle through GF(2^n)-{0} */
} while (incr == 0); incr ^= mp->ma_poly; /* This will implicitly clear the
highest bit */
end = mp->ma_table + size; for (;;) {
ep = &mp->ma_table[(i+incr)&mask];
if (freeslot == NULL) { if (ep->me_key == NULL) {
for (;;) { if (freeslot != NULL)
ep += incr; return freeslot;
if (ep >= end) else
ep -= size;
ekey = ep->me_key;
if (ekey == NULL)
return ep; return ep;
if (ekey == dummy) { }
if (ep->me_key == dummy) {
if (freeslot == NULL)
freeslot = ep; freeslot = ep;
break;
}
if (ekey == key || (ep->me_hash == hash &&
cmpobject(ekey, key) == 0))
return ep;
} }
} else if (ep->me_key == key ||
(ep->me_hash == hash &&
for (;;) { cmpobject(ep->me_key, key) == 0)) {
ep += incr;
if (ep >= end)
ep -= size;
ekey = ep->me_key;
if (ekey == NULL)
return freeslot;
if (ekey == key ||
(ekey != dummy &&
ep->me_hash == hash && cmpobject(ekey, key) == 0))
return ep; return ep;
}
/* Cycle through GF(2^n)-{0} */
incr = incr << 1;
if (incr > mask)
incr ^= mp->ma_poly;
} }
} }
...@@ -272,25 +263,20 @@ mappingresize(mp) ...@@ -272,25 +263,20 @@ mappingresize(mp)
mappingobject *mp; mappingobject *mp;
{ {
register int oldsize = mp->ma_size; register int oldsize = mp->ma_size;
register int newsize; register int newsize, newpoly;
register mappingentry *oldtable = mp->ma_table; register mappingentry *oldtable = mp->ma_table;
register mappingentry *newtable; register mappingentry *newtable;
register mappingentry *ep; register mappingentry *ep;
register int i; register int i;
newsize = mp->ma_size; newsize = mp->ma_size;
for (i = 0; ; i++) { for (i = 0, newsize = MINSIZE; ; i++, newsize <<= 1) {
if (primes[i] <= 0) { if (i > sizeof(polys)/sizeof(polys[0])) {
/* Ran out of primes */ /* Ran out of polynomials */
err_nomem(); err_nomem();
return -1; return -1;
} }
if (primes[i] > mp->ma_used*2) { if (newsize > mp->ma_used*2) {
newsize = primes[i]; newpoly = polys[i];
if (newsize != primes[i]) {
/* Integer truncation */
err_nomem();
return -1;
}
break; break;
} }
} }
...@@ -300,6 +286,7 @@ mappingresize(mp) ...@@ -300,6 +286,7 @@ mappingresize(mp)
return -1; return -1;
} }
mp->ma_size = newsize; mp->ma_size = newsize;
mp->ma_poly = newpoly;
mp->ma_table = newtable; mp->ma_table = newtable;
mp->ma_fill = 0; mp->ma_fill = 0;
mp->ma_used = 0; mp->ma_used = 0;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment