Commit 581af902 authored by Rusty Russell's avatar Rusty Russell

Hashtable routines.

parent 021e03c4
#include <string.h>
/**
* hashtable - hash table routines
*
* A hash table is an efficient structure for looking up keys. This version
* grows with usage and allows efficient deletion.
*
* Caveat: pointers placed into the hash must be aligned to 64 bits.
* ie. from malloc or because the structure they point to is aligned.
* An assert will be raised if this isn't true.
*
* Example:
* #include <ccan/hashtable/hashtable.h>
* #include <ccan/hash/hash.h>
* #include <stdio.h>
* #include <err.h>
* #include <string.h>
*
* struct name_to_digit {
* const char *name;
* unsigned int val;
* };
*
* static struct name_to_digit map[] = {
* { "zero", 0},
* { "one", 1 },
* { "two", 2 },
* { "three", 3 },
* { "four", 4 },
* { "five", 5 },
* { "six", 6 },
* { "seven", 7 },
* { "eight", 8 },
* { "nine", 9 }
* };
*
* // Wrapper for rehash function pointer.
* static unsigned long rehash(const void *e, void *unused)
* {
* return hash_string(((struct name_to_digit *)e)->name);
* }
*
* // Comparison function.
* static bool streq(const void *e, void *string)
* {
* return strcmp(((struct name_to_digit *)e)->name, string) == 0;
* }
*
* // We let them add their own aliases, eg. --alias=v=5
* static void add_alias(struct hashtable *ht, const char *alias)
* {
* char *eq;
* struct name_to_digit *n;
*
* n = malloc(sizeof(*n));
* n->name = strdup(alias);
*
* eq = strchr(n->name, '=');
* if (!eq || ((n->val = atoi(eq+1)) == 0 && !strcmp(eq+1, "0")))
* errx(1, "Usage: --alias=<name>=<value>");
* *eq = '\0';
* hashtable_add(ht, hash_string(n->name), n);
* }
*
* int main(int argc, char *argv[])
* {
* struct hashtable *ht;
* unsigned int i;
* unsigned long val;
*
* if (argc < 2)
* errx(1, "Usage: %s [--alias=<name>=<val>]... <str>...",
* argv[0]);
*
* // Create and populate hash table.
* ht = hashtable_new(rehash, NULL);
* for (i = 0; i < sizeof(map)/sizeof(map[0]); i++)
* hashtable_add(ht, hash_string(map[i].name), &map[i]);
*
* // Add any aliases to the hash table.
* for (i = 1; i < argc; i++) {
* if (!strncmp(argv[i], "--alias=", strlen("--alias=")))
* add_alias(ht, argv[i] + strlen("--alias="));
* else
* break;
* }
*
* // Find the other args in the hash table.
* for (val = 0; i < argc; i++) {
* struct name_to_digit *n;
* n = hashtable_find(ht, hash_string(argv[i]),
* streq, argv[i]);
* if (!n)
* errx(1, "Invalid digit name %s", argv[i]);
* // Append it to the value we are building up.
* val *= 10;
* val += n->val;
* }
* printf("%lu\n", val);
* return 0;
* }
*
* Licence: GPLv2 (or later)
*/
int main(int argc, char *argv[])
{
if (argc != 2)
return 1;
if (strcmp(argv[1], "depends") == 0) {
return 0;
}
return 1;
}
#include <ccan/hashtable/hashtable.h>
#include <stdint.h>
#include <stdlib.h>
#include <assert.h>
/* This means a struct hashtable takes at least 512 bytes / 1k (32/64 bits). */
#define HASHTABLE_BASE_BITS 7
/* We use 0x1 as deleted marker. */
#define HASHTABLE_DELETED ((void *)0x1)
/* Number of redundant bits in a pointer. */
#define PTR_STEAL_BITS 3
static inline uintptr_t get_extra_ptr_bits(const void *p)
{
return (uintptr_t)p & (((uintptr_t)1 << PTR_STEAL_BITS)-1);
}
static inline void *get_raw_ptr(const void *p)
{
return (void *)((uintptr_t)p & ~(((uintptr_t)1 << PTR_STEAL_BITS)-1));
}
static inline void *make_ptr(const void *p, uintptr_t bits)
{
return (void *)((uintptr_t)p | bits);
}
struct hashtable {
unsigned long (*rehash)(const void *elem, void *priv);
void *priv;
unsigned int bits;
unsigned long elems, max;
void **table;
};
static inline bool ptr_is_valid(const void *e)
{
return e > HASHTABLE_DELETED;
}
struct hashtable *hashtable_new(unsigned long (*rehash)(const void *elem,
void *priv),
void *priv)
{
struct hashtable *ht = malloc(sizeof(struct hashtable));
if (ht) {
ht->bits = HASHTABLE_BASE_BITS;
ht->rehash = rehash;
ht->priv = priv;
ht->elems = 0;
ht->max = (1 << ht->bits) * 3 / 4;
ht->table = calloc(1 << ht->bits, sizeof(void *));
if (!ht->table) {
free(ht);
ht = NULL;
}
}
return ht;
}
void hashtable_free(struct hashtable *ht)
{
free(ht->table);
free(ht);
}
static unsigned long hash_bucket(const struct hashtable *ht,
unsigned long h, unsigned long *ptrbits)
{
*ptrbits = (h >> ht->bits) & (((uintptr_t)1 << PTR_STEAL_BITS)-1);
return h & ((1 << ht->bits)-1);
}
/* This does not expand the hash table, that's up to caller. */
static void ht_add(struct hashtable *ht, const void *new, unsigned long h)
{
unsigned long i, h2;
i = hash_bucket(ht, h, &h2);
while (ptr_is_valid(ht->table[i])) {
i = (i + 1) & ((1 << ht->bits)-1);
}
ht->table[i] = make_ptr(new, h2);
}
static bool double_table(struct hashtable *ht)
{
unsigned int i;
size_t oldnum = (size_t)1 << ht->bits;
void **oldtable, *e;
oldtable = ht->table;
ht->table = calloc(1 << (ht->bits+1), sizeof(void *));
if (!ht->table) {
ht->table = oldtable;
return false;
}
ht->bits++;
ht->max *= 2;
for (i = 0; i < oldnum; i++) {
if (ptr_is_valid(e = oldtable[i])) {
e = get_raw_ptr(e);
ht_add(ht, e, ht->rehash(e, ht->priv));
}
}
free(oldtable);
return true;
}
bool hashtable_add(struct hashtable *ht, unsigned long hash, const void *p)
{
if (ht->elems+1 > ht->max && !double_table(ht))
return false;
ht->elems++;
assert(p);
assert(((uintptr_t)p & (((uintptr_t)1 << PTR_STEAL_BITS)-1)) == 0);
ht_add(ht, p, hash);
return true;
}
/* Find bucket for an element. */
static size_t ht_find(struct hashtable *ht,
unsigned long hash,
bool (*cmp)(const void *htelem, void *cmpdata),
void *cmpdata)
{
void *e;
unsigned long i, h2;
i = hash_bucket(ht, hash, &h2);
while ((e = ht->table[i]) != NULL) {
if (e != HASHTABLE_DELETED
&& h2 == get_extra_ptr_bits(e)
&& cmp(get_raw_ptr(ht->table[i]), cmpdata))
return i;
i = (i + 1) & ((1 << ht->bits)-1);
}
return (size_t)-1;
}
/* If every one of the following buckets are DELETED (up to the next unused
one), we can actually mark them all unused. */
static void delete_run(struct hashtable *ht, unsigned int num)
{
unsigned int i, last = num + 1;
while (ht->table[last & ((1 << ht->bits)-1)]) {
if (ptr_is_valid(ht->table[last & ((1 << ht->bits)-1)]))
return;
last++;
}
for (i = num; i < last; i++)
ht->table[i & ((1 << ht->bits)-1)] = NULL;
}
void *hashtable_find(struct hashtable *ht,
unsigned long hash,
bool (*cmp)(const void *htelem, void *cmpdata),
void *cmpdata)
{
size_t i = ht_find(ht, hash, cmp, cmpdata);
if (i != (size_t)-1)
return get_raw_ptr(ht->table[i]);
return NULL;
}
static bool ptr_cmp(const void *htelem, void *cmpdata)
{
return htelem == cmpdata;
}
bool hashtable_del(struct hashtable *ht, unsigned long hash, const void *p)
{
size_t i = ht_find(ht, hash, ptr_cmp, (void *)p);
if (i == (size_t)-1)
return false;
ht->elems--;
ht->table[i] = HASHTABLE_DELETED;
delete_run(ht, i);
return true;
}
void hashtable_traverse(struct hashtable *ht, bool (*cb)(void *p, void *cbarg),
void *cbarg)
{
size_t i;
for (i = 0; i < (1 << ht->bits); i++) {
if (ptr_is_valid(ht->table[i]))
if (cb(get_raw_ptr(ht->table[i]), cbarg))
return;
}
}
#ifndef CCAN_HASHTABLE_H
#define CCAN_HASHTABLE_H
#include "config.h"
#include <stdbool.h>
struct hashtable;
/**
* hashtable_new - allocate a hash tree.
* @rehash: hash function to use for rehashing.
* @priv: private argument to @rehash function.
*/
struct hashtable *hashtable_new(unsigned long (*rehash)(const void *elem,
void *priv),
void *priv);
/**
* hashtable_free - dellocate a hash tree.
*
* This doesn't do anything to any pointers left in it.
*/
void hashtable_free(struct hashtable *);
/**
* hashtable_find - look for an object in a hash tree.
* @ht: the hashtable
* @hash: the hash value of the object you are seeking
* @cmp: a comparison function: returns true if the object is found
* @cmpdata: the data to hand as second arg to @cmp
*
* Note that you can do all the work inside the cmp function if you want to:
* hashtable_find will return @htelem if @cmp returns true.
*/
void *hashtable_find(struct hashtable *ht,
unsigned long hash,
bool (*cmp)(const void *htelem, void *cmpdata),
void *cmpdata);
/**
* hashtable_add - add an (aligned) pointer into a hash tree.
* @ht: the hashtable
* @hash: the hash value of the object
* @p: the non-NULL pointer, usually from malloc or equiv.
*
* Note that this implementation (ab)uses the lower bits of the pointer, so
* it will abort if the pointer is not aligned to 8 bytes.
*
* Also note that this can only fail due to allocation failure. Otherwise, it
* returns true.
*/
bool hashtable_add(struct hashtable *ht, unsigned long hash, const void *p);
/**
* hashtable_del - remove a pointer from a hash tree
* @ht: the hashtable
* @hash: the hash value of the object
* @p: the pointer
*
* Returns true if the pointer was found (and deleted).
*/
bool hashtable_del(struct hashtable *ht, unsigned long hash, const void *p);
/**
* hashtable_traverse - call a function on every pointer in hash tree
* @ht: the hashtable
* @cb: the callback: returns true to abort traversal.
* @cbarg: the argument to the callback
*
* Note that your traversal callback may delete any entry (it won't crash),
* but it may make the traverse unreliable.
*/
void hashtable_traverse(struct hashtable *ht, bool (*cb)(void *p, void *cbarg),
void *cbarg);
#endif /* CCAN_HASHTABLE_H */
#include <ccan/hashtable/hashtable.h>
#include <ccan/hashtable/hashtable.c>
#include <ccan/tap/tap.h>
#include <stdbool.h>
#include <string.h>
#define NUM_VALS (1 << HASHTABLE_BASE_BITS)
/* We use the number divided by two as the hash (for lots of
collisions), plus set all the higher bits so we can detect if they
don't get masked out. */
static unsigned long hash(const void *elem, void *unused)
{
unsigned long h = *(uint64_t *)elem / 2;
h |= -1UL << HASHTABLE_BASE_BITS;
return h;
}
static bool objcmp(const void *htelem, void *cmpdata)
{
return *(uint64_t *)htelem == *(uint64_t *)cmpdata;
}
static void add_vals(struct hashtable *ht,
const uint64_t val[], unsigned int num)
{
uint64_t i;
for (i = 0; i < num; i++) {
if (hashtable_find(ht, hash(&i, NULL), objcmp, &i)) {
fail("%llu already in hash", (long long)i);
return;
}
hashtable_add(ht, hash(&val[i], NULL), &val[i]);
if (hashtable_find(ht, hash(&i, NULL), objcmp, &i) != &val[i]) {
fail("%llu not added to hash", (long long)i);
return;
}
}
pass("Added %llu numbers to hash", (long long)i);
}
static void refill_vals(struct hashtable *ht,
const uint64_t val[], unsigned int num)
{
uint64_t i;
for (i = 0; i < num; i++) {
if (hashtable_find(ht, hash(&i, NULL), objcmp, &i))
continue;
hashtable_add(ht, hash(&val[i], NULL), &val[i]);
}
}
static void find_vals(struct hashtable *ht,
const uint64_t val[], unsigned int num)
{
uint64_t i;
for (i = 0; i < num; i++) {
if (hashtable_find(ht, hash(&i, NULL), objcmp, &i) != &val[i]) {
fail("%llu not found in hash", (long long)i);
return;
}
}
pass("Found %llu numbers in hash", (long long)i);
}
static void del_vals(struct hashtable *ht,
const uint64_t val[], unsigned int num)
{
uint64_t i;
for (i = 0; i < num; i++) {
if (!hashtable_del(ht, hash(&val[i], NULL), &val[i])) {
fail("%llu not deleted from hash", (long long)i);
return;
}
}
pass("Deleted %llu numbers in hash", (long long)i);
}
struct travarg {
unsigned int count;
struct hashtable *ht;
char touched[NUM_VALS];
uint64_t *val;
};
static bool count(void *p, void *cbarg)
{
struct travarg *travarg = cbarg;
travarg->count++;
travarg->touched[*(uint64_t *)p]++;
return false;
}
static bool delete_self(void *p, void *cbarg)
{
struct travarg *travarg = cbarg;
uint64_t val = *(uint64_t *)p;
travarg->count++;
travarg->touched[val]++;
return !hashtable_del(travarg->ht, hash(p, NULL), p);
}
static bool delete_next(void *p, void *cbarg)
{
struct travarg *travarg = cbarg;
uint64_t val = *(uint64_t *)p;
uint64_t *next = &travarg->val[(val + 1) % NUM_VALS];
travarg->count++;
travarg->touched[val]++;
return !hashtable_del(travarg->ht, hash(next, NULL), next);
}
static bool delete_prev(void *p, void *cbarg)
{
struct travarg *travarg = cbarg;
uint64_t val = *(uint64_t *)p;
uint64_t *prev = &travarg->val[(val - 1) % NUM_VALS];
travarg->count++;
travarg->touched[val]++;
return !hashtable_del(travarg->ht, hash(prev, NULL), prev);
}
static bool stop_halfway(void *p, void *cbarg)
{
struct travarg *travarg = cbarg;
travarg->count++;
travarg->touched[*(uint64_t *)p]++;
return (travarg->count == NUM_VALS / 2);
}
static void check_all_touched_once(struct travarg *travarg)
{
unsigned int i;
for (i = 0; i < NUM_VALS; i++) {
if (travarg->touched[i] != 1) {
fail("Value %u touched %u times",
i, travarg->touched[i]);
return;
}
}
pass("All values touched once");
}
static void check_only_touched_once(struct travarg *travarg)
{
unsigned int i;
for (i = 0; i < NUM_VALS; i++) {
if (travarg->touched[i] > 1) {
fail("Value %u touched multiple (%u) times",
i, travarg->touched[i]);
return;
}
}
pass("No value touched twice");
}
int main(int argc, char *argv[])
{
unsigned int i;
struct hashtable *ht;
uint64_t val[NUM_VALS];
struct travarg travarg;
uint64_t dne;
plan_tests(20);
for (i = 0; i < NUM_VALS; i++)
val[i] = i;
dne = i;
ht = hashtable_new(hash, NULL);
ok1(ht->max < (1 << ht->bits));
ok1(ht->bits == HASHTABLE_BASE_BITS);
/* We cannot find an entry which doesn't exist. */
ok1(!hashtable_find(ht, hash(&dne, NULL), objcmp, &dne));
/* Fill it, it should increase in size (once). */
add_vals(ht, val, NUM_VALS);
ok1(ht->bits == HASHTABLE_BASE_BITS + 1);
ok1(ht->max < (1 << ht->bits));
/* Find all. */
find_vals(ht, val, NUM_VALS);
ok1(!hashtable_find(ht, hash(&dne, NULL), objcmp, &dne));
/* Delete all. */
del_vals(ht, val, NUM_VALS);
ok1(!hashtable_find(ht, hash(&val[0], NULL), objcmp, &val[0]));
/* Refill. */
refill_vals(ht, val, NUM_VALS);
/* Traverse tests. */
travarg.ht = ht;
travarg.val = val;
memset(travarg.touched, 0, sizeof(travarg.touched));
travarg.count = 0;
/* Traverse. */
hashtable_traverse(ht, count, &travarg);
ok1(travarg.count == NUM_VALS);
check_all_touched_once(&travarg);
memset(travarg.touched, 0, sizeof(travarg.touched));
travarg.count = 0;
hashtable_traverse(ht, stop_halfway, &travarg);
ok1(travarg.count == NUM_VALS / 2);
check_only_touched_once(&travarg);
memset(travarg.touched, 0, sizeof(travarg.touched));
travarg.count = 0;
i = 0;
/* Delete until we make no more progress. */
for (;;) {
hashtable_traverse(ht, delete_self, &travarg);
if (travarg.count == i || travarg.count > NUM_VALS)
break;
i = travarg.count;
}
ok1(travarg.count == NUM_VALS);
check_all_touched_once(&travarg);
memset(travarg.touched, 0, sizeof(travarg.touched));
travarg.count = 0;
refill_vals(ht, val, NUM_VALS);
hashtable_traverse(ht, delete_next, &travarg);
ok1(travarg.count <= NUM_VALS);
check_only_touched_once(&travarg);
memset(travarg.touched, 0, sizeof(travarg.touched));
travarg.count = 0;
refill_vals(ht, val, NUM_VALS);
hashtable_traverse(ht, delete_prev, &travarg);
ok1(travarg.count <= NUM_VALS);
check_only_touched_once(&travarg);
return exit_status();
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment