Commit e59acd52 authored by Rusty Russell's avatar Rusty Russell

intmap: reimplement so that intmap_after works.

A critbit tree is a binary tree which keeps branches for each bit
which differs in the leaves.  It's a simple data structure, but not
entirely simple to implement the primitives people expect, as this bus
shows.

The bug: I added an open iterator, and intmap_after_ for a random
value would sometimes return the wrong node.

Cause: we don't know what the prefix is as we iterate, so by only
testing the critbits in the tree, we can end up in the wrong place.
This is only a problem if the value isn't in the (sub)tree, but this
can easily happen even with contiguous trees should deletion occur.
You can see an example in the next patch, which adds a test.

After finding a bug in my intmap_after() routine, I went searching for
other implementations to see how they handled it.  Most didn't provide
an open-ended iterator like this, relying on callback iterators which
don't allow deletion.  Gah!

The exception was https://github.com/blynn/blt/blob/master/blt.c#L179
which implements blt_ceil() which does this (if you add one to the
key, at least).  However, it does it by effectively finding a node,
using that to derive the prefix, then walking down the tree again.
That's pretty suboptimal.

There are basically two choices if you want an efficient after()
operation: to reimplement this approach with some optimizations
(ie. keep branches as we descend, and when we get to the bottom and
know the prefix, we know which branch to go down), or keep the bits
which got to each node.

The latter is more optimal, but less generally useful: for bit
strings, for example, we could keep the bits in common on each node,
rather than storing the entire string at the bottom.  But in practice
you'd be doing allocations to re-create the index if the caller wanted
it.

However, in this implementation our keys are 64 bits only, and we
already use a u8 for the bit number: using a 64-bit value there
consumes no more space (thanks to alignment).  We can store the
critbit by using the prefix capped by a bit: 0b10000...0000 means
no prefix and highest bit is the critbit, and 0bxxxxx1000...000
means the prefix is xxxxxx and the critbit is the 6th highest bit.

The penalty is that iteration 70% slower.  It's still pretty fast
though.

Before:
$ for i in `seq 5`; do ./speed 10000000; done | stats
10000000,random generation (nsec),3-4(3.2+/-0.4)
10000000,critbit insert (nsec),1530-1751(1633.2+/-80)
10000000,critbit successful lookup (nsec),1723-1993(1806.8+/-97)
10000000,critbit failed lookup (nsec),1763-2104(1933.6+/-1.3e+02)
10000000,critbit iteration (nsec),208-266(242.2+/-19)
10000000,critbit memory (bytes),48
10000000,critbit delete (nsec),1747-1861(1803.8+/-42)
10000000,critbit consecutive iteration (nsec),182-228(210+/-18)

After:
10000000,random generation (nsec),3-4(3.2+/-0.4)
10000000,critbit insert (nsec),1533-1699(1628+/-65)
10000000,critbit successful lookup (nsec),1831-2104(1972.4+/-1e+02)
10000000,critbit failed lookup (nsec),1850-2152(2008.2+/-1.1e+02)
10000000,critbit iteration (nsec),304-324(312.8+/-7.5)
10000000,critbit memory (bytes),48
10000000,critbit delete (nsec),1617-1872(1752+/-99)
10000000,critbit consecutive iteration (nsec),303-318(311+/-5.4)
Signed-off-by: default avatarRusty Russell <rusty@rustcorp.com.au>
parent 82229812
...@@ -22,7 +22,7 @@ int main(int argc, char *argv[]) ...@@ -22,7 +22,7 @@ int main(int argc, char *argv[])
return 1; return 1;
if (strcmp(argv[1], "depends") == 0) { if (strcmp(argv[1], "depends") == 0) {
printf("ccan/ilog\n" printf("ccan/bitops\n"
"ccan/short_types\n" "ccan/short_types\n"
"ccan/str\n" "ccan/str\n"
"ccan/tcon\n" "ccan/tcon\n"
......
...@@ -13,8 +13,8 @@ ...@@ -13,8 +13,8 @@
struct node { struct node {
/* These point to strings or nodes. */ /* These point to strings or nodes. */
struct intmap child[2]; struct intmap child[2];
/* The bit where these children differ (0 == lsb) */ /* Encoding both prefix and critbit: 1 is appended to prefix. */
uint8_t bit_num; intmap_index_t prefix_and_critbit;
}; };
static void update_span(const void *p, size_t s, uintptr_t *min, uintptr_t *max) static void update_span(const void *p, size_t s, uintptr_t *min, uintptr_t *max)
......
/* CC0 license (public domain) - see LICENSE file for details */ /* CC0 license (public domain) - see LICENSE file for details */
/* This code is based on ccan/strmap.c. */ /* This code is based on ccan/strmap.c. */
#include <ccan/bitops/bitops.h>
#include <ccan/intmap/intmap.h> #include <ccan/intmap/intmap.h>
#include <ccan/short_types/short_types.h> #include <ccan/short_types/short_types.h>
#include <ccan/str/str.h> #include <ccan/str/str.h>
#include <ccan/ilog/ilog.h>
#include <assert.h> #include <assert.h>
#include <stdlib.h> #include <stdlib.h>
#include <errno.h> #include <errno.h>
...@@ -11,28 +11,38 @@ ...@@ -11,28 +11,38 @@
struct node { struct node {
/* These point to strings or nodes. */ /* These point to strings or nodes. */
struct intmap child[2]; struct intmap child[2];
/* The bit where these children differ (0 == lsb) */ /* Encoding both prefix and critbit: 1 is appended to prefix. */
u8 bit_num; intmap_index_t prefix_and_critbit;
}; };
/* Closest member to this in a non-empty map. */ static int critbit(const struct intmap *n)
static struct intmap *closest(struct intmap *n, intmap_index_t index)
{ {
/* Anything with NULL value is a node. */ return bitops_ls64(n->u.n->prefix_and_critbit);
while (!n->v) {
u8 direction = (index >> n->u.n->bit_num) & 1;
n = &n->u.n->child[direction];
}
return n;
} }
void *intmap_get_(const struct intmap *map, intmap_index_t index) static intmap_index_t prefix_mask(int critbit)
{ {
struct intmap *n; /* Mask does not include critbit itself, but can't shift by critbit+1 */
return -2ULL << critbit;
}
static intmap_index_t prefix_and_critbit(intmap_index_t v, int n)
{
intmap_index_t critbit = ((intmap_index_t)1 << n);
return (v & ~(critbit - 1)) | critbit;
}
void *intmap_get_(const struct intmap *map, intmap_index_t index)
{
/* Not empty map? */ /* Not empty map? */
if (!intmap_empty_(map)) { if (!intmap_empty_(map)) {
n = closest((struct intmap *)map, index); const struct intmap *n = map;
/* Anything with NULL value is a node. */
while (!n->v) {
/* FIXME: compare cmp prefix, if not equal, ENOENT */
u8 direction = (index >> critbit(n)) & 1;
n = &n->u.n->child[direction];
}
if (index == n->u.i) if (index == n->u.i)
return n->v; return n->v;
} }
...@@ -40,36 +50,18 @@ void *intmap_get_(const struct intmap *map, intmap_index_t index) ...@@ -40,36 +50,18 @@ void *intmap_get_(const struct intmap *map, intmap_index_t index)
return NULL; return NULL;
} }
bool intmap_add_(struct intmap *map, intmap_index_t index, const void *value) static bool split_node(struct intmap *n, intmap_index_t nodeindex,
intmap_index_t index, const void *value)
{ {
struct intmap *n;
struct node *newn; struct node *newn;
u8 bit_num, new_dir; int new_dir;
assert(value);
/* Empty map? */
if (intmap_empty_(map)) {
map->u.i = index;
map->v = (void *)value;
return true;
}
/* Find closest existing member. */
n = closest(map, index);
/* Find highest bit where they differ. */ /* Find highest bit where they differ. */
bit_num = ilog64(n->u.i ^ index); unsigned int critbit = bitops_hs64(nodeindex ^ index);
if (bit_num == 0) { assert(critbit < CHAR_BIT*sizeof(index));
errno = EEXIST;
return false;
}
bit_num--;
assert(bit_num < CHAR_BIT*sizeof(index));
/* Which direction do we go at this bit? */ /* Which direction do we go at this bit? */
new_dir = (index >> bit_num) & 1; new_dir = (index >> critbit) & 1;
/* Allocate new node. */ /* Allocate new node. */
newn = malloc(sizeof(*newn)); newn = malloc(sizeof(*newn));
...@@ -77,27 +69,48 @@ bool intmap_add_(struct intmap *map, intmap_index_t index, const void *value) ...@@ -77,27 +69,48 @@ bool intmap_add_(struct intmap *map, intmap_index_t index, const void *value)
errno = ENOMEM; errno = ENOMEM;
return false; return false;
} }
newn->bit_num = bit_num; newn->prefix_and_critbit = prefix_and_critbit(index, critbit);
newn->child[new_dir].v = (void *)value; newn->child[new_dir].v = (void *)value;
newn->child[new_dir].u.i = index; newn->child[new_dir].u.i = index;
newn->child[!new_dir] = *n;
n->u.n = newn;
n->v = NULL;
return true;
}
bool intmap_add_(struct intmap *map, intmap_index_t index, const void *value)
{
struct intmap *n;
assert(value);
/* Empty map? */
if (intmap_empty_(map)) {
map->u.i = index;
map->v = (void *)value;
return true;
}
/* Find where to insert: not closest, but first which differs! */
n = map; n = map;
/* Anything with NULL value is a node. */
while (!n->v) { while (!n->v) {
u8 direction; int crit = critbit(n);
intmap_index_t mask = prefix_mask(crit);
u8 direction = (index >> crit) & 1;
/* Subtle: bit numbers are "backwards" for comparison */ if ((index & mask) != (n->u.n->prefix_and_critbit & mask))
if (n->u.n->bit_num < bit_num) return split_node(n, n->u.n->prefix_and_critbit & mask,
break; index, value);
direction = (index >> n->u.n->bit_num) & 1;
n = &n->u.n->child[direction]; n = &n->u.n->child[direction];
} }
newn->child[!new_dir] = *n; if (index == n->u.i) {
n->u.n = newn; errno = EEXIST;
n->v = NULL; return false;
return true; }
return split_node(n, n->u.i, index, value);
} }
void *intmap_del_(struct intmap *map, intmap_index_t index) void *intmap_del_(struct intmap *map, intmap_index_t index)
...@@ -116,8 +129,9 @@ void *intmap_del_(struct intmap *map, intmap_index_t index) ...@@ -116,8 +129,9 @@ void *intmap_del_(struct intmap *map, intmap_index_t index)
n = map; n = map;
/* Anything with NULL value is a node. */ /* Anything with NULL value is a node. */
while (!n->v) { while (!n->v) {
/* FIXME: compare cmp prefix, if not equal, ENOENT */
parent = n; parent = n;
direction = (index >> n->u.n->bit_num) & 1; direction = (index >> critbit(n)) & 1;
n = &n->u.n->child[direction]; n = &n->u.n->child[direction];
} }
...@@ -163,38 +177,58 @@ void *intmap_first_(const struct intmap *map, intmap_index_t *indexp) ...@@ -163,38 +177,58 @@ void *intmap_first_(const struct intmap *map, intmap_index_t *indexp)
void *intmap_after_(const struct intmap *map, intmap_index_t *indexp) void *intmap_after_(const struct intmap *map, intmap_index_t *indexp)
{ {
const struct intmap *n, *prev = NULL; const struct intmap *n, *prev = NULL;
intmap_index_t index = (*indexp) + 1;
/* Special case of overflow */
if (index == 0)
goto none_left;
/* Special case of empty map */ /* Special case of empty map */
if (intmap_empty_(map)) { if (intmap_empty_(map))
errno = ENOENT; goto none_left;
return NULL;
}
/* Follow down, track the last place where we could have set a bit /* Follow down, until prefix differs. */
* instead of clearing it: this is the higher alternative tree. */
n = map; n = map;
while (!n->v) { while (!n->v) {
u8 direction = (*indexp >> n->u.n->bit_num) & 1; int crit = critbit(n);
u8 direction;
intmap_index_t prefix, idx;
idx = (index >> crit);
direction = idx & 1;
/* Leave critbit in place: we can't shift by 64 anyway */
idx |= 1;
prefix = n->u.n->prefix_and_critbit >> crit;
/* If this entire tree is greater than index, take first */
if (idx < prefix)
return intmap_first_(n, indexp);
/* If this entire tree is less than index, we're past it. */
else if (idx > prefix)
goto try_greater_tree;
/* Remember greater tree for backtracking */
if (!direction) if (!direction)
prev = n; prev = n;
n = &n->u.n->child[direction]; n = &n->u.n->child[direction];
} }
/* Found a successor? */ /* Found a successor? */
if (n->u.i > *indexp) { if (n->u.i >= index) {
errno = 0; errno = 0;
*indexp = n->u.i; *indexp = n->u.i;
return n->v; return n->v;
} }
/* Nowhere to go back up to? */ try_greater_tree:
if (!prev) { /* If we ever took a lesser branch, go back to greater branch */
errno = ENOENT; if (prev)
return NULL; return intmap_first_(&prev->u.n->child[1], indexp);
}
/* Get first one from that other branch. */ none_left:
return intmap_first_(&prev->u.n->child[1], indexp); errno = ENOENT;
return NULL;
} }
void *intmap_last_(const struct intmap *map, intmap_index_t *indexp) void *intmap_last_(const struct intmap *map, intmap_index_t *indexp)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment