Commit 6804501c authored by Rusty Russell's avatar Rusty Russell

tdb2: change to using a hash tree.

As the locking issues with enlarging a hash were so nasty, we switch to a
tree structure for the entries.  It's a hash which expands to point to
sub-hashes when it fills.

This means we no longer have a 'volatile' header: the top hash cannot move.
In fact, we no longer store a copy of the header in the tdb_context; we only
need hash_seed.

New helper functions for accessing writable areas and committing the results
(if they had to be copied).  New debug test to make sure we don't hold access
while we're doing something which could cause us to unmap/remap.

Find becomes more complicated: we need to track where we found (or didn't
find) an entry so we can easily add/delete it.

Traverse becomes more complicated: we need to track where we were in the
hash tree.
parent 32710c91
This diff is collapsed.
......@@ -510,10 +510,10 @@ int set_header(struct tdb_context *tdb,
{
uint64_t keybits = (fls64(keylen) + 1) / 2;
/* Use top bits of hash, so it's independent of hash table size. */
/* Use bottom bits of hash, so it's independent of hash table size. */
rec->magic_and_meta
= zone_bits
| ((hash >> 59) << 6)
| ((hash & ((1 << 5)-1)) << 6)
| ((actuallen - (keylen + datalen)) << 11)
| (keybits << 43)
| (TDB_MAGIC << 48);
......@@ -654,8 +654,8 @@ tdb_off_t alloc(struct tdb_context *tdb, size_t keylen, size_t datalen,
tdb_len_t size, actual;
struct tdb_used_record rec;
/* We don't want header to change during this! */
assert(tdb->header_uptodate);
/* We can't hold pointers during this: we could unmap! */
assert(!tdb->direct_access);
size = adjust_size(keylen, datalen, growing);
......
This diff is collapsed.
......@@ -26,6 +26,7 @@
License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#include "private.h"
#include <assert.h>
#include <ccan/likely/likely.h>
void tdb_munmap(struct tdb_context *tdb)
......@@ -72,6 +73,10 @@ static int tdb_oob(struct tdb_context *tdb, tdb_off_t len, bool probe)
struct stat st;
int ret;
/* FIXME: We can't hold pointers during this: we could unmap! */
/* (We currently do this in traverse!) */
// assert(!tdb->direct_access || tdb_has_expansion_lock(tdb));
if (len <= tdb->map_size)
return 0;
if (tdb->flags & TDB_INTERNAL) {
......@@ -375,44 +380,29 @@ int tdb_write_off(struct tdb_context *tdb, tdb_off_t off, tdb_off_t val)
return tdb_write_convert(tdb, off, &val, sizeof(val));
}
/* read a lump of data, allocating the space for it */
void *tdb_alloc_read(struct tdb_context *tdb, tdb_off_t offset, tdb_len_t len)
static void *_tdb_alloc_read(struct tdb_context *tdb, tdb_off_t offset,
tdb_len_t len, unsigned int prefix)
{
void *buf;
/* some systems don't like zero length malloc */
buf = malloc(len ? len : 1);
buf = malloc(prefix + len ? prefix + len : 1);
if (unlikely(!buf)) {
tdb->ecode = TDB_ERR_OOM;
tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv,
"tdb_alloc_read malloc failed len=%lld\n",
(long long)len);
} else if (unlikely(tdb->methods->read(tdb, offset, buf, len))) {
(long long)prefix + len);
} else if (unlikely(tdb->methods->read(tdb, offset, buf+prefix, len))) {
free(buf);
buf = NULL;
}
return buf;
}
uint64_t hash_record(struct tdb_context *tdb, tdb_off_t off)
/* read a lump of data, allocating the space for it */
void *tdb_alloc_read(struct tdb_context *tdb, tdb_off_t offset, tdb_len_t len)
{
struct tdb_used_record pad, *r;
const void *key;
uint64_t klen, hash;
r = tdb_get(tdb, off, &pad, sizeof(pad));
if (!r)
/* FIXME */
return 0;
klen = rec_key_length(r);
key = tdb_access_read(tdb, off + sizeof(pad), klen, false);
if (!key)
return 0;
hash = tdb_hash(tdb, key, klen);
tdb_access_release(tdb, key);
return hash;
return _tdb_alloc_read(tdb, offset, len, 0);
}
static int fill(struct tdb_context *tdb,
......@@ -474,19 +464,57 @@ static int tdb_expand_file(struct tdb_context *tdb, tdb_len_t addition)
return 0;
}
/* This is only neded for tdb_access_commit, but used everywhere to simplify. */
struct tdb_access_hdr {
tdb_off_t off;
tdb_len_t len;
bool convert;
};
const void *tdb_access_read(struct tdb_context *tdb,
tdb_off_t off, tdb_len_t len, bool convert)
{
const void *ret = NULL;
const void *ret = NULL;
if (likely(!(tdb->flags & TDB_CONVERT)))
ret = tdb_direct(tdb, off, len);
if (!ret) {
ret = tdb_alloc_read(tdb, off, len);
if (convert)
tdb_convert(tdb, (void *)ret, len);
}
struct tdb_access_hdr *hdr;
hdr = _tdb_alloc_read(tdb, off, len, sizeof(*hdr));
if (hdr) {
ret = hdr + 1;
if (convert)
tdb_convert(tdb, (void *)ret, len);
}
} else
tdb->direct_access++;
return ret;
}
void *tdb_access_write(struct tdb_context *tdb,
tdb_off_t off, tdb_len_t len, bool convert)
{
void *ret = NULL;
if (likely(!(tdb->flags & TDB_CONVERT)))
ret = tdb_direct(tdb, off, len);
if (!ret) {
struct tdb_access_hdr *hdr;
hdr = _tdb_alloc_read(tdb, off, len, sizeof(*hdr));
if (hdr) {
hdr->off = off;
hdr->len = len;
hdr->convert = convert;
ret = hdr + 1;
if (convert)
tdb_convert(tdb, (void *)ret, len);
}
} else
tdb->direct_access++;
return ret;
}
......@@ -495,7 +523,30 @@ void tdb_access_release(struct tdb_context *tdb, const void *p)
if (!tdb->map_ptr
|| (char *)p < (char *)tdb->map_ptr
|| (char *)p >= (char *)tdb->map_ptr + tdb->map_size)
free((void *)p);
free((struct tdb_access_hdr *)p - 1);
else
tdb->direct_access--;
}
int tdb_access_commit(struct tdb_context *tdb, void *p)
{
int ret = 0;
if (!tdb->map_ptr
|| (char *)p < (char *)tdb->map_ptr
|| (char *)p >= (char *)tdb->map_ptr + tdb->map_size) {
struct tdb_access_hdr *hdr;
hdr = (struct tdb_access_hdr *)p - 1;
if (hdr->convert)
ret = tdb_write_convert(tdb, hdr->off, p, hdr->len);
else
ret = tdb_write(tdb, hdr->off, p, hdr->len);
free(hdr);
} else
tdb->direct_access--;
return ret;
}
#if 0
......
......@@ -257,7 +257,7 @@ static int tdb_nest_lock(struct tdb_context *tdb, tdb_off_t offset, int ltype,
{
struct tdb_lock_type *new_lck;
if (offset >= TDB_HASH_LOCK_START + (1 << 30) + tdb->map_size / 8) {
if (offset >= TDB_HASH_LOCK_START + TDB_HASH_LOCK_RANGE + tdb->map_size / 8) {
tdb->ecode = TDB_ERR_LOCK;
tdb->log(tdb, TDB_DEBUG_FATAL, tdb->log_priv,
"tdb_lock: invalid offset %llu ltype=%d\n",
......@@ -374,11 +374,6 @@ static int tdb_nest_unlock(struct tdb_context *tdb, tdb_off_t off, int ltype)
*/
*lck = tdb->lockrecs[--tdb->num_lockrecs];
if (tdb->num_lockrecs == 0) {
/* If we're not holding any locks, header can change. */
tdb->header_uptodate = false;
}
return ret;
}
......@@ -410,8 +405,10 @@ static int tdb_lock_gradual(struct tdb_context *tdb,
int ret;
enum tdb_lock_flags nb_flags = (flags & ~TDB_LOCK_WAIT);
if (len <= 4) {
/* Single record. Just do blocking lock. */
if (len <= 1) {
/* 0 would mean to end-of-file... */
assert(len != 0);
/* Single hash. Just do blocking lock. */
return tdb_brlock(tdb, ltype, off, len, flags);
}
......@@ -437,14 +434,11 @@ static int tdb_lock_gradual(struct tdb_context *tdb,
/* lock/unlock entire database. It can only be upgradable if you have some
* other way of guaranteeing exclusivity (ie. transaction write lock).
* Note that we don't lock the free chains: noone can get those locks
* without a hash chain lock first.
* The header *will be* up to date once this returns success. */
* Note that we don't lock the free chains: currently noone can get those locks
* without a hash chain lock first. */
int tdb_allrecord_lock(struct tdb_context *tdb, int ltype,
enum tdb_lock_flags flags, bool upgradable)
{
tdb_off_t hash_size;
/* FIXME: There are no locks on read-only dbs */
if (tdb->read_only) {
tdb->ecode = TDB_ERR_LOCK;
......@@ -484,11 +478,9 @@ int tdb_allrecord_lock(struct tdb_context *tdb, int ltype,
return -1;
}
/* Lock all the hash buckets. */
again:
hash_size = (1ULL << tdb->header.v.hash_bits);
if (tdb_lock_gradual(tdb, ltype, flags, TDB_HASH_LOCK_START,
hash_size)) {
TDB_HASH_LOCK_RANGE)) {
if (!(flags & TDB_LOCK_PROBE)) {
tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv,
"tdb_lockall hashes failed (%s)\n",
......@@ -503,12 +495,6 @@ again:
tdb->allrecord_lock.ltype = upgradable ? F_WRLCK : ltype;
tdb->allrecord_lock.off = upgradable;
/* Now we re-check header, holding lock. */
if (unlikely(header_changed(tdb))) {
tdb_allrecord_unlock(tdb, ltype);
goto again;
}
/* Now check for needing recovery. */
if (unlikely(tdb_needs_recovery(tdb))) {
tdb_allrecord_unlock(tdb, ltype);
......@@ -544,8 +530,6 @@ void tdb_unlock_expand(struct tdb_context *tdb, int ltype)
/* unlock entire db */
int tdb_allrecord_unlock(struct tdb_context *tdb, int ltype)
{
tdb_off_t hash_size;
/* FIXME: There are no locks on read-only dbs */
if (tdb->read_only) {
tdb->ecode = TDB_ERR_LOCK;
......@@ -579,11 +563,15 @@ int tdb_allrecord_unlock(struct tdb_context *tdb, int ltype)
tdb->allrecord_lock.count = 0;
tdb->allrecord_lock.ltype = 0;
tdb->header_uptodate = false;
hash_size = (1ULL << tdb->header.v.hash_bits);
return tdb_brunlock(tdb, ltype, TDB_HASH_LOCK_START,
TDB_HASH_LOCK_RANGE);
}
return tdb_brunlock(tdb, ltype, TDB_HASH_LOCK_START, hash_size);
bool tdb_has_expansion_lock(struct tdb_context *tdb)
{
return find_nestlock(tdb, TDB_EXPANSION_LOCK) != NULL
|| (tdb->flags & TDB_NOLOCK);
}
bool tdb_has_locks(struct tdb_context *tdb)
......@@ -637,18 +625,19 @@ int tdb_unlockall_read(struct tdb_context *tdb)
}
#endif
/* Returns the list we actually locked. */
tdb_off_t tdb_lock_list(struct tdb_context *tdb, uint64_t hash,
int ltype, enum tdb_lock_flags waitflag)
int tdb_lock_hashes(struct tdb_context *tdb,
tdb_off_t hash_lock,
tdb_len_t hash_range,
int ltype, enum tdb_lock_flags waitflag)
{
tdb_off_t list = hash & ((1ULL << tdb->header.v.hash_bits) - 1);
/* Header can change ONLY if we had no locks before. */
bool can_change = tdb->num_lockrecs == 0;
/* FIXME: Do this properly, using hlock_range */
unsigned lock = TDB_HASH_LOCK_START
+ (hash_lock >> (64 - TDB_HASH_LOCK_RANGE_BITS));
/* a allrecord lock allows us to avoid per chain locks */
if (tdb->allrecord_lock.count &&
(ltype == tdb->allrecord_lock.ltype || ltype == F_RDLCK)) {
return list;
return 0;
}
if (tdb->allrecord_lock.count) {
......@@ -657,27 +646,18 @@ tdb_off_t tdb_lock_list(struct tdb_context *tdb, uint64_t hash,
"tdb_lock_list: have %s allrecordlock\n",
tdb->allrecord_lock.ltype == F_RDLCK
? "read" : "write");
return TDB_OFF_ERR;
return -1;
}
again:
if (tdb_nest_lock(tdb, TDB_HASH_LOCK_START + list, ltype, waitflag))
return TDB_OFF_ERR;
if (can_change && unlikely(header_changed(tdb))) {
tdb_off_t new = hash & ((1ULL << tdb->header.v.hash_bits) - 1);
if (new != list) {
tdb_nest_unlock(tdb, TDB_HASH_LOCK_START+list, ltype);
list = new;
goto again;
}
}
return list;
return tdb_nest_lock(tdb, lock, ltype, waitflag);
}
int tdb_unlock_list(struct tdb_context *tdb, tdb_off_t list, int ltype)
int tdb_unlock_hashes(struct tdb_context *tdb,
tdb_off_t hash_lock,
tdb_len_t hash_range, int ltype)
{
list &= ((1ULL << tdb->header.v.hash_bits) - 1);
unsigned lock = TDB_HASH_LOCK_START
+ (hash_lock >> (64 - TDB_HASH_LOCK_RANGE_BITS));
/* a allrecord lock allows us to avoid per chain locks */
if (tdb->allrecord_lock.count) {
......@@ -689,9 +669,9 @@ int tdb_unlock_list(struct tdb_context *tdb, tdb_off_t list, int ltype)
return -1;
}
return 0;
} else {
return tdb_nest_unlock(tdb, TDB_HASH_LOCK_START + list, ltype);
}
return tdb_nest_unlock(tdb, lock, ltype);
}
/* Hash locks use TDB_HASH_LOCK_START + the next 30 bits.
......@@ -701,7 +681,7 @@ int tdb_unlock_list(struct tdb_context *tdb, tdb_off_t list, int ltype)
*/
static tdb_off_t free_lock_off(tdb_off_t b_off)
{
return TDB_HASH_LOCK_START + (1 << 30) + b_off / sizeof(tdb_off_t);
return TDB_HASH_LOCK_START + TDB_HASH_LOCK_RANGE + b_off / sizeof(tdb_off_t);
}
int tdb_lock_free_bucket(struct tdb_context *tdb, tdb_off_t b_off,
......@@ -747,7 +727,7 @@ static int chainlock(struct tdb_context *tdb, const TDB_DATA *key,
int ret;
uint64_t h = tdb_hash(tdb, key->dptr, key->dsize);
ret = tdb_lock_list(tdb, h, ltype, waitflag) == TDB_OFF_ERR ? -1 : 0;
ret = tdb_lock_hashes(tdb, h, 1, ltype, waitflag);
tdb_trace_1rec(tdb, func, *key);
return ret;
}
......@@ -763,7 +743,7 @@ int tdb_chainunlock(struct tdb_context *tdb, TDB_DATA key)
{
uint64_t h = tdb_hash(tdb, key.dptr, key.dsize);
tdb_trace_1rec(tdb, "tdb_chainunlock", key);
return tdb_unlock_list(tdb, h, F_WRLCK);
return tdb_unlock_hashes(tdb, h, 1, F_WRLCK);
}
#if 0
......
......@@ -83,13 +83,36 @@ typedef uint64_t tdb_off_t;
/* Hash chain locks. */
#define TDB_HASH_LOCK_START 3
/* We start wih 256 hash buckets, and a 64k-sized zone. */
#define INITIAL_HASH_BITS 8
/* Range for hash locks. */
#define TDB_HASH_LOCK_RANGE_BITS 30
#define TDB_HASH_LOCK_RANGE (1 << TDB_HASH_LOCK_RANGE_BITS)
/* We have 1024 entries in the top level. */
#define TDB_TOPLEVEL_HASH_BITS 10
/* And 64 entries in each sub-level: thus 64 bits exactly after 9 levels. */
#define TDB_SUBLEVEL_HASH_BITS 6
/* And 8 entries in each group, ie 8 groups per sublevel. */
#define TDB_HASH_GROUP_BITS 3
/* We start with a 64k-sized zone. */
#define INITIAL_ZONE_BITS 16
/* Try to create zones at least 32 times larger than allocations. */
#define TDB_COMFORT_FACTOR_BITS 5
/* We steal bits from the offsets to store hash info. */
#define TDB_OFF_HASH_GROUP_MASK ((1ULL << TDB_HASH_GROUP_BITS) - 1)
/* We steal this many upper bits, giving a maximum offset of 64 exabytes. */
#define TDB_OFF_UPPER_STEAL 8
#define TDB_OFF_UPPER_STEAL_EXTRA 7
#define TDB_OFF_UPPER_STEAL_TRUNCBIT 1
/* If this is set, hash is truncated (only 1 bit is valid). */
#define TDB_OFF_HASH_TRUNCATED_BIT 56
/* The bit number where we store next level of hash. */
#define TDB_OFF_HASH_EXTRA_BIT 57
/* Convenience mask to get actual offset. */
#define TDB_OFF_MASK \
(((1ULL << (64 - TDB_OFF_UPPER_STEAL)) - 1) - TDB_OFF_HASH_GROUP_MASK)
/* We ensure buckets up to size 1 << (zone_bits - TDB_COMFORT_FACTOR_BITS). */
/* FIXME: test this matches size_to_bucket! */
#define BUCKETS_FOR_ZONE(zone_bits) ((zone_bits) + 2 - TDB_COMFORT_FACTOR_BITS)
......@@ -173,34 +196,61 @@ static inline uint64_t frec_magic(const struct tdb_free_record *f)
return f->magic_and_meta & ~((1ULL << 6) - 1);
}
/* These parts can change while we have db open. */
struct tdb_header_volatile {
uint64_t generation; /* Makes sure it changes on every update. */
uint64_t hash_bits; /* Entries in hash table. */
uint64_t hash_off; /* Offset of hash table. */
/* Each zone has its set of free lists at the head.
*
* For each zone we have a series of per-size buckets, and a final bucket for
* "too big". */
struct free_zone_header {
/* How much does this zone cover? */
uint64_t zone_bits;
/* tdb_off_t buckets[free_buckets + 1] */
};
/* this is stored at the front of every database */
struct tdb_header {
char magic_food[32]; /* for /etc/magic */
char magic_food[64]; /* for /etc/magic */
/* FIXME: Make me 32 bit? */
uint64_t version; /* version of the code */
uint64_t hash_test; /* result of hashing HASH_MAGIC. */
uint64_t hash_seed; /* "random" seed written at creation time. */
struct tdb_header_volatile v;
tdb_off_t reserved[28];
tdb_off_t reserved[19];
/* Top level hash table. */
tdb_off_t hashtable[1ULL << TDB_TOPLEVEL_HASH_BITS];
};
/* Each zone has its set of free lists at the head.
*
* For each zone we have a series of per-size buckets, and a final bucket for
* "too big". */
struct free_zone_header {
/* How much does this zone cover? */
uint64_t zone_bits;
/* tdb_off_t buckets[free_buckets + 1] */
/* Information about a particular (locked) hash entry. */
struct hash_info {
/* Full hash value of entry. */
uint64_t h;
/* Start and length of lock acquired. */
tdb_off_t hlock_start;
tdb_len_t hlock_range;
/* Start of hash group. */
tdb_off_t group_start;
/* Bucket we belong in. */
unsigned int home_bucket;
/* Bucket we (or an empty space) were found in. */
unsigned int found_bucket;
/* How many bits of the hash are already used. */
unsigned int hash_used;
/* Current working group. */
tdb_off_t group[1 << TDB_HASH_GROUP_BITS];
};
struct traverse_info {
struct traverse_level {
tdb_off_t hashtable;
const tdb_off_t *entries;
/* We ignore groups here, and treat it as a big array. */
unsigned entry;
unsigned int total_buckets;
} levels[64 / TDB_SUBLEVEL_HASH_BITS];
unsigned int num_levels;
unsigned int toplevel_group;
/* This makes delete-everything-inside-traverse work as expected. */
tdb_off_t prev;
};
enum tdb_lock_flags {
......@@ -224,6 +274,9 @@ struct tdb_context {
/* Mmap (if any), or malloc (for TDB_INTERNAL). */
void *map_ptr;
/* Are we accessing directly? (debugging check). */
int direct_access;
/* Open file descriptor (undefined for TDB_INTERNAL). */
int fd;
......@@ -236,11 +289,6 @@ struct tdb_context {
/* Error code for last tdb error. */
enum TDB_ERROR ecode;
/* A cached copy of the header */
struct tdb_header header;
/* (for debugging). */
bool header_uptodate;
/* the flags passed to tdb_open, for tdb_reopen. */
uint32_t flags;
......@@ -251,6 +299,7 @@ struct tdb_context {
/* Hash function. */
tdb_hashfn_t khash;
void *hash_priv;
uint64_t hash_seed;
/* Set if we are in a transaction. */
struct tdb_transaction *transaction;
......@@ -284,19 +333,33 @@ struct tdb_methods {
/*
internal prototypes
*/
/* tdb.c: */
/* Returns true if header changed (and updates it). */
bool header_changed(struct tdb_context *tdb);
/* Commit header to disk. */
int write_header(struct tdb_context *tdb);
/* hash.c: */
void tdb_hash_init(struct tdb_context *tdb);
/* Hash random memory. */
uint64_t tdb_hash(struct tdb_context *tdb, const void *ptr, size_t len);
/* offset of hash table entry for this list/hash value */
tdb_off_t hash_off(struct tdb_context *tdb, uint64_t list);
/* Hash on disk. */
uint64_t hash_record(struct tdb_context *tdb, tdb_off_t off);
/* Find and lock a hash entry (or where it would be). */
tdb_off_t find_and_lock(struct tdb_context *tdb,
struct tdb_data key,
int ltype,
struct hash_info *h,
struct tdb_used_record *rec);
int replace_in_hash(struct tdb_context *tdb,
struct hash_info *h,
tdb_off_t new_off);
int add_to_hash(struct tdb_context *tdb, struct hash_info *h,
tdb_off_t new_off);
int delete_from_hash(struct tdb_context *tdb, struct hash_info *h);
/* For tdb_check */
bool is_subhash(tdb_off_t val);
/* free.c: */
int tdb_zone_init(struct tdb_context *tdb);
......@@ -338,7 +401,13 @@ void *tdb_get(struct tdb_context *tdb, tdb_off_t off, void *pad, size_t len);
/* Either alloc a copy, or give direct access. Release frees or noop. */
const void *tdb_access_read(struct tdb_context *tdb,
tdb_off_t off, tdb_len_t len, bool convert);
void *tdb_access_write(struct tdb_context *tdb,
tdb_off_t off, tdb_len_t len, bool convert);
/* Release result of tdb_access_read/write. */
void tdb_access_release(struct tdb_context *tdb, const void *p);
/* Commit result of tdb_acces_write. */
int tdb_access_commit(struct tdb_context *tdb, void *p);
/* Convenience routine to get an offset. */
tdb_off_t tdb_read_off(struct tdb_context *tdb, tdb_off_t off);
......@@ -373,16 +442,17 @@ int tdb_write_convert(struct tdb_context *tdb, tdb_off_t off,
int tdb_read_convert(struct tdb_context *tdb, tdb_off_t off,
void *rec, size_t len);
/* Hash on disk. */
uint64_t hash_record(struct tdb_context *tdb, tdb_off_t off);
/* lock.c: */
void tdb_lock_init(struct tdb_context *tdb);
/* Lock/unlock a particular hash list. */
tdb_off_t tdb_lock_list(struct tdb_context *tdb, uint64_t hash,
int ltype, enum tdb_lock_flags waitflag);
int tdb_unlock_list(struct tdb_context *tdb, tdb_off_t list, int ltype);
/* Lock/unlock a range of hashes. */
int tdb_lock_hashes(struct tdb_context *tdb,
tdb_off_t hash_lock, tdb_len_t hash_range,
int ltype, enum tdb_lock_flags waitflag);
int tdb_unlock_hashes(struct tdb_context *tdb,
tdb_off_t hash_lock,
tdb_len_t hash_range, int ltype);
/* Lock/unlock a particular free bucket. */
int tdb_lock_free_bucket(struct tdb_context *tdb, tdb_off_t b_off,
......@@ -404,6 +474,16 @@ void tdb_unlock_open(struct tdb_context *tdb);
/* Serialize db expand. */
int tdb_lock_expand(struct tdb_context *tdb, int ltype);
void tdb_unlock_expand(struct tdb_context *tdb, int ltype);
bool tdb_has_expansion_lock(struct tdb_context *tdb);
/* traverse.c: */
int first_in_hash(struct tdb_context *tdb, int ltype,
struct traverse_info *tinfo,
TDB_DATA *kbuf, size_t *dlen);
int next_in_hash(struct tdb_context *tdb, int ltype,
struct traverse_info *tinfo,
TDB_DATA *kbuf, size_t *dlen);
#if 0
......
This diff is collapsed.
......@@ -10,7 +10,6 @@ struct tdb_layout *new_tdb_layout(void)
struct tdb_layout *layout = malloc(sizeof(*layout));
layout->num_elems = 0;
layout->elem = NULL;
layout->htable = -1;
return layout;
}
......@@ -63,19 +62,6 @@ void tdb_layout_add_used(struct tdb_layout *layout,
add(layout, elem);
}
void tdb_layout_add_hashtable(struct tdb_layout *layout,
unsigned int hash_bits,
tdb_len_t extra)
{
union tdb_layout_elem elem;
elem.base.type = HASHTABLE;
elem.hashtable.hash_bits = hash_bits;
elem.hashtable.extra = extra;
assert(layout->htable == -1U);
layout->htable = layout->num_elems;
add(layout, elem);
}
static tdb_len_t free_record_len(tdb_len_t len)
{
return sizeof(struct tdb_used_record) + len;
......@@ -93,7 +79,8 @@ static tdb_len_t data_record_len(struct tle_used *used)
static tdb_len_t hashtable_len(struct tle_hashtable *htable)
{
return sizeof(struct tdb_used_record)
+ (sizeof(tdb_off_t) << htable->hash_bits);
+ (sizeof(tdb_off_t) << TDB_SUBLEVEL_HASH_BITS)
+ htable->extra;
}
static tdb_len_t zone_header_len(struct tle_zone *zone)
......@@ -127,7 +114,7 @@ static void set_hashtable(void *mem, struct tdb_context *tdb,
struct tle_hashtable *htable)
{
struct tdb_used_record *u = mem;
tdb_len_t len = sizeof(tdb_off_t) << htable->hash_bits;
tdb_len_t len = sizeof(tdb_off_t) << TDB_SUBLEVEL_HASH_BITS;
set_header(tdb, u, 0, len, len + htable->extra, 0,
last_zone->zone_bits);
......@@ -151,17 +138,64 @@ static void add_to_freetable(struct tdb_context *tdb,
sizeof(struct tdb_used_record) + elen);
}
static tdb_off_t hbucket_off(tdb_off_t group_start, unsigned ingroup)
{
return group_start
+ (ingroup % (1 << TDB_HASH_GROUP_BITS)) * sizeof(tdb_off_t);
}
/* Get bits from a value. */
static uint32_t bits(uint64_t val, unsigned start, unsigned num)
{
assert(num <= 32);
return (val >> start) & ((1U << num) - 1);
}
/* We take bits from the top: that way we can lock whole sections of the hash
* by using lock ranges. */
static uint32_t use_bits(uint64_t h, unsigned num, unsigned *used)
{
*used += num;
return bits(h, 64 - *used, num);
}
static tdb_off_t encode_offset(tdb_off_t new_off, unsigned bucket,
uint64_t h)
{
return bucket
| new_off
| ((uint64_t)bits(h, 64 - TDB_OFF_UPPER_STEAL_EXTRA,
TDB_OFF_UPPER_STEAL_EXTRA)
<< TDB_OFF_HASH_EXTRA_BIT);
}
/* FIXME: Our hash table handling here is primitive: we don't expand! */
static void add_to_hashtable(struct tdb_context *tdb,
tdb_off_t eoff,
struct tdb_data key)
{
uint64_t hash = tdb_hash(tdb, key.dptr, key.dsize);
tdb_off_t hoff;
uint64_t h = tdb_hash(tdb, key.dptr, key.dsize);
tdb_off_t b_off, group_start;
unsigned i, group, in_group;
unsigned used = 0;
group = use_bits(h, TDB_TOPLEVEL_HASH_BITS-TDB_HASH_GROUP_BITS, &used);
in_group = use_bits(h, TDB_HASH_GROUP_BITS, &used);
group_start = offsetof(struct tdb_header, hashtable)
+ group * (sizeof(tdb_off_t) << TDB_HASH_GROUP_BITS);
while (tdb_read_off(tdb, hoff = hash_off(tdb, hash)) != 0)
hash++;
for (i = 0; i < (1 << TDB_HASH_GROUP_BITS); i++) {
unsigned bucket = (in_group + i) % (1 << TDB_HASH_GROUP_BITS);
tdb_write_off(tdb, hoff, eoff);
b_off = hbucket_off(group_start, bucket);
if (tdb_read_off(tdb, b_off) == 0) {
tdb_write_off(tdb, b_off,
encode_offset(eoff, bucket, h));
return;
}
}
abort();
}
/* FIXME: Support TDB_CONVERT */
......@@ -170,12 +204,10 @@ struct tdb_context *tdb_layout_get(struct tdb_layout *layout)
unsigned int i;
tdb_off_t off, len;
tdb_len_t zone_left;
struct tdb_header *hdr;
char *mem;
struct tdb_context *tdb;
struct tle_zone *last_zone = NULL;
assert(layout->htable != -1U);
assert(layout->elem[0].base.type == ZONE);
zone_left = 0;
......@@ -221,18 +253,12 @@ struct tdb_context *tdb_layout_get(struct tdb_layout *layout)
mem = malloc(off+1);
/* Now populate our header, cribbing from a real TDB header. */
tdb = tdb_open(NULL, TDB_INTERNAL, O_RDWR, 0, &tap_log_attr);
hdr = (void *)mem;
*hdr = tdb->header;
hdr->v.generation++;
hdr->v.hash_bits = layout->elem[layout->htable].hashtable.hash_bits;
hdr->v.hash_off = layout->elem[layout->htable].base.off
+ sizeof(struct tdb_used_record);
memcpy(mem, tdb->map_ptr, sizeof(struct tdb_header));
/* Mug the tdb we have to make it use this. */
free(tdb->map_ptr);
tdb->map_ptr = mem;
tdb->map_size = off+1;
header_changed(tdb);
for (i = 0; i < layout->num_elems; i++) {
union tdb_layout_elem *e = &layout->elem[i];
......
......@@ -10,9 +10,12 @@ void tdb_layout_add_free(struct tdb_layout *layout, tdb_len_t len);
void tdb_layout_add_used(struct tdb_layout *layout,
TDB_DATA key, TDB_DATA data,
tdb_len_t extra);
#if 0 /* FIXME: Allow allocation of subtables */
void tdb_layout_add_hashtable(struct tdb_layout *layout,
unsigned int hash_bits,
int htable_parent, /* -1 == toplevel */
unsigned int bucket,
tdb_len_t extra);
#endif
struct tdb_context *tdb_layout_get(struct tdb_layout *layout);
enum layout_type {
......@@ -44,7 +47,8 @@ struct tle_used {
struct tle_hashtable {
struct tle_base base;
unsigned hash_bits;
int parent;
unsigned int bucket;
tdb_len_t extra;
};
......@@ -59,6 +63,5 @@ union tdb_layout_elem {
struct tdb_layout {
unsigned int num_elems;
union tdb_layout_elem *elem;
unsigned int htable;
};
#endif /* TDB2_TEST_LAYOUT_H */
#include <ccan/tdb2/tdb.c>
#include <ccan/tdb2/free.c>
#include <ccan/tdb2/lock.c>
#include <ccan/tdb2/hash.c>
#include <ccan/tdb2/io.c>
#include <ccan/tap/tap.h>
#include "logging.h"
......@@ -27,7 +28,7 @@ int main(int argc, char *argv[])
/* We should neatly encode all values. */
for (i = 0; i < 48; i++) {
uint64_t h = 1ULL << (i < 5 ? 63 - i : 63 - 4);
uint64_t h = 1ULL << (i < 5 ? i : 4);
uint64_t klen = 1ULL << (i < 16 ? i : 15);
uint64_t dlen = 1ULL << i;
uint64_t xlen = 1ULL << (i < 32 ? i : 31);
......@@ -38,7 +39,7 @@ int main(int argc, char *argv[])
ok1(rec_key_length(&rec) == klen);
ok1(rec_data_length(&rec) == dlen);
ok1(rec_extra_padding(&rec) == xlen);
ok1((uint64_t)rec_hash(&rec) << (64 - 5) == h);
ok1((uint64_t)rec_hash(&rec) == h);
ok1(rec_zone_bits(&rec) == zbits);
ok1(rec_magic(&rec) == TDB_MAGIC);
}
......
......@@ -2,6 +2,7 @@
#include <ccan/tdb2/free.c>
#include <ccan/tdb2/lock.c>
#include <ccan/tdb2/io.c>
#include <ccan/tdb2/hash.c>
#include <ccan/tap/tap.h>
static unsigned int dumb_fls(uint64_t num)
......
......@@ -2,6 +2,7 @@
#include <ccan/tdb2/free.c>
#include <ccan/tdb2/lock.c>
#include <ccan/tdb2/io.c>
#include <ccan/tdb2/hash.c>
#include <ccan/tdb2/check.c>
#include <ccan/tap/tap.h>
#include "logging.h"
......
......@@ -3,6 +3,7 @@
#include <ccan/tdb2/lock.c>
#include <ccan/tdb2/io.c>
#include <ccan/tdb2/check.c>
#include <ccan/tdb2/hash.c>
#include <ccan/tap/tap.h>
#include "logging.h"
......
......@@ -2,6 +2,7 @@
#include <ccan/tdb2/free.c>
#include <ccan/tdb2/lock.c>
#include <ccan/tdb2/io.c>
#include <ccan/tdb2/hash.c>
#include <ccan/tdb2/check.c>
#include <ccan/tap/tap.h>
#include "logging.h"
......@@ -38,67 +39,63 @@ int main(int argc, char *argv[])
/* No coalescing can be done due to EOF */
layout = new_tdb_layout();
tdb_layout_add_zone(layout, zone_bits, false);
tdb_layout_add_hashtable(layout, 12, 0);
tdb = tdb_layout_get(layout);
len = layout->elem[2].free.len;
len = layout->elem[1].free.len;
zone_off = layout->elem[0].base.off;
ok1(tdb_check(tdb, NULL, NULL) == 0);
ok1(free_record_length(tdb, layout->elem[2].base.off) == len);
ok1(free_record_length(tdb, layout->elem[1].base.off) == len);
/* Figure out which bucket free entry is. */
b_off = bucket_off(zone_off, size_to_bucket(zone_bits, len));
/* Lock and fail to coalesce. */
ok1(tdb_lock_free_bucket(tdb, b_off, TDB_LOCK_WAIT) == 0);
ok1(coalesce(tdb, zone_off, zone_bits, layout->elem[2].base.off,
ok1(coalesce(tdb, zone_off, zone_bits, layout->elem[1].base.off,
b_off, len) == 0);
tdb_unlock_free_bucket(tdb, b_off);
tdb_unlock_list(tdb, 0, F_WRLCK);
ok1(free_record_length(tdb, layout->elem[2].base.off) == len);
ok1(free_record_length(tdb, layout->elem[1].base.off) == len);
ok1(tdb_check(tdb, NULL, NULL) == 0);
tdb_close(tdb);
/* No coalescing can be done due to used record */
layout = new_tdb_layout();
tdb_layout_add_zone(layout, zone_bits, false);
tdb_layout_add_hashtable(layout, 12, 0);
tdb_layout_add_free(layout, 1024);
tdb_layout_add_used(layout, key, data, 6);
tdb = tdb_layout_get(layout);
zone_off = layout->elem[0].base.off;
ok1(free_record_length(tdb, layout->elem[2].base.off) == 1024);
ok1(free_record_length(tdb, layout->elem[1].base.off) == 1024);
ok1(tdb_check(tdb, NULL, NULL) == 0);
/* Figure out which bucket free entry is. */
b_off = bucket_off(zone_off, size_to_bucket(zone_bits, 1024));
/* Lock and fail to coalesce. */
ok1(tdb_lock_free_bucket(tdb, b_off, TDB_LOCK_WAIT) == 0);
ok1(coalesce(tdb, zone_off, zone_bits, layout->elem[2].base.off,
ok1(coalesce(tdb, zone_off, zone_bits, layout->elem[1].base.off,
b_off, 1024) == 0);
tdb_unlock_free_bucket(tdb, b_off);
ok1(free_record_length(tdb, layout->elem[2].base.off) == 1024);
ok1(free_record_length(tdb, layout->elem[1].base.off) == 1024);
ok1(tdb_check(tdb, NULL, NULL) == 0);
tdb_close(tdb);
/* Coalescing can be done due to two free records, then EOF */
layout = new_tdb_layout();
tdb_layout_add_zone(layout, zone_bits, false);
tdb_layout_add_hashtable(layout, 12, 0);
tdb_layout_add_free(layout, 1024);
tdb = tdb_layout_get(layout);
zone_off = layout->elem[0].base.off;
len = layout->elem[3].free.len;
ok1(free_record_length(tdb, layout->elem[2].base.off) == 1024);
ok1(free_record_length(tdb, layout->elem[3].base.off) == len);
len = layout->elem[2].free.len;
ok1(free_record_length(tdb, layout->elem[1].base.off) == 1024);
ok1(free_record_length(tdb, layout->elem[2].base.off) == len);
ok1(tdb_check(tdb, NULL, NULL) == 0);
/* Figure out which bucket (first) free entry is. */
b_off = bucket_off(zone_off, size_to_bucket(zone_bits, 1024));
/* Lock and coalesce. */
ok1(tdb_lock_free_bucket(tdb, b_off, TDB_LOCK_WAIT) == 0);
ok1(coalesce(tdb, zone_off, zone_bits, layout->elem[2].base.off,
ok1(coalesce(tdb, zone_off, zone_bits, layout->elem[1].base.off,
b_off, 1024) == 1);
ok1(!tdb_has_locks(tdb));
ok1(free_record_length(tdb, layout->elem[2].base.off)
ok1(free_record_length(tdb, layout->elem[1].base.off)
== 1024 + sizeof(struct tdb_used_record) + len);
ok1(tdb_check(tdb, NULL, NULL) == 0);
tdb_close(tdb);
......@@ -106,24 +103,23 @@ int main(int argc, char *argv[])
/* Coalescing can be done due to two free records, then data */
layout = new_tdb_layout();
tdb_layout_add_zone(layout, zone_bits, false);
tdb_layout_add_hashtable(layout, 12, 0);
tdb_layout_add_free(layout, 1024);
tdb_layout_add_free(layout, 512);
tdb_layout_add_used(layout, key, data, 6);
tdb = tdb_layout_get(layout);
zone_off = layout->elem[0].base.off;
ok1(free_record_length(tdb, layout->elem[2].base.off) == 1024);
ok1(free_record_length(tdb, layout->elem[3].base.off) == 512);
ok1(free_record_length(tdb, layout->elem[1].base.off) == 1024);
ok1(free_record_length(tdb, layout->elem[2].base.off) == 512);
ok1(tdb_check(tdb, NULL, NULL) == 0);
/* Figure out which bucket free entry is. */
b_off = bucket_off(zone_off, size_to_bucket(zone_bits, 1024));
/* Lock and coalesce. */
ok1(tdb_lock_free_bucket(tdb, b_off, TDB_LOCK_WAIT) == 0);
ok1(coalesce(tdb, zone_off, zone_bits, layout->elem[2].base.off,
ok1(coalesce(tdb, zone_off, zone_bits, layout->elem[1].base.off,
b_off, 1024) == 1);
ok1(!tdb_has_locks(tdb));
ok1(free_record_length(tdb, layout->elem[2].base.off)
ok1(free_record_length(tdb, layout->elem[1].base.off)
== 1024 + sizeof(struct tdb_used_record) + 512);
ok1(tdb_check(tdb, NULL, NULL) == 0);
tdb_close(tdb);
......@@ -131,25 +127,24 @@ int main(int argc, char *argv[])
/* Coalescing can be done due to three free records, then EOF */
layout = new_tdb_layout();
tdb_layout_add_zone(layout, zone_bits, false);
tdb_layout_add_hashtable(layout, 12, 0);
tdb_layout_add_free(layout, 1024);
tdb_layout_add_free(layout, 512);
tdb = tdb_layout_get(layout);
zone_off = layout->elem[0].base.off;
len = layout->elem[4].free.len;
ok1(free_record_length(tdb, layout->elem[2].base.off) == 1024);
ok1(free_record_length(tdb, layout->elem[3].base.off) == 512);
ok1(free_record_length(tdb, layout->elem[4].base.off) == len);
len = layout->elem[3].free.len;
ok1(free_record_length(tdb, layout->elem[1].base.off) == 1024);
ok1(free_record_length(tdb, layout->elem[2].base.off) == 512);
ok1(free_record_length(tdb, layout->elem[3].base.off) == len);
ok1(tdb_check(tdb, NULL, NULL) == 0);
/* Figure out which bucket free entry is. */
b_off = bucket_off(zone_off, size_to_bucket(zone_bits, 1024));
/* Lock and coalesce. */
ok1(tdb_lock_free_bucket(tdb, b_off, TDB_LOCK_WAIT) == 0);
ok1(coalesce(tdb, zone_off, zone_bits, layout->elem[2].base.off,
ok1(coalesce(tdb, zone_off, zone_bits, layout->elem[1].base.off,
b_off, 1024) == 1);
ok1(!tdb_has_locks(tdb));
ok1(free_record_length(tdb, layout->elem[2].base.off)
ok1(free_record_length(tdb, layout->elem[1].base.off)
== 1024 + sizeof(struct tdb_used_record) + 512
+ sizeof(struct tdb_used_record) + len);
ok1(tdb_check(tdb, NULL, NULL) == 0);
......@@ -158,23 +153,22 @@ int main(int argc, char *argv[])
/* Coalescing across two zones isn't possible. */
layout = new_tdb_layout();
tdb_layout_add_zone(layout, zone_bits, false);
tdb_layout_add_hashtable(layout, 12, 0);
tdb_layout_add_zone(layout, zone_bits, true);
tdb = tdb_layout_get(layout);
zone_off = layout->elem[0].base.off;
len = layout->elem[2].free.len;
ok1(free_record_length(tdb, layout->elem[2].base.off) == len);
len = layout->elem[1].free.len;
ok1(free_record_length(tdb, layout->elem[1].base.off) == len);
ok1(tdb_check(tdb, NULL, NULL) == 0);
/* Figure out which list free entry is. */
b_off = bucket_off(zone_off, size_to_bucket(zone_bits, len));
/* Lock and coalesce. */
ok1(tdb_lock_free_bucket(tdb, b_off, TDB_LOCK_WAIT) == 0);
ok1(coalesce(tdb, zone_off, zone_bits, layout->elem[2].base.off,
ok1(coalesce(tdb, zone_off, zone_bits, layout->elem[1].base.off,
b_off, len) == 0);
tdb_unlock_free_bucket(tdb, b_off);
ok1(!tdb_has_locks(tdb));
ok1(free_record_length(tdb, layout->elem[2].base.off) == len);
ok1(free_record_length(tdb, layout->elem[1].base.off) == len);
ok1(tdb_check(tdb, NULL, NULL) == 0);
tdb_close(tdb);
......
#include <ccan/tdb2/tdb.c>
#include <ccan/tdb2/free.c>
#include <ccan/tdb2/lock.c>
#include <ccan/tdb2/io.c>
#include <ccan/tdb2/hash.c>
#include <ccan/tdb2/check.c>
#include <ccan/tap/tap.h>
#include "logging.h"
/* We rig the hash so adjacent-numbered records always clash. */
static uint64_t clash(const void *key, size_t len, uint64_t seed, void *priv)
{
return ((uint64_t)*(unsigned int *)key)
<< (64 - TDB_TOPLEVEL_HASH_BITS - 1);
}
int main(int argc, char *argv[])
{
unsigned int i, j;
struct tdb_context *tdb;
unsigned int v;
struct tdb_used_record rec;
struct tdb_data key = { (unsigned char *)&v, sizeof(v) };
struct tdb_data dbuf = { (unsigned char *)&v, sizeof(v) };
union tdb_attribute hattr = { .hash = { .base = { TDB_ATTRIBUTE_HASH },
.hash_fn = clash } };
int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP,
TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT,
TDB_NOMMAP|TDB_CONVERT,
};
hattr.base.next = &tap_log_attr;
plan_tests(sizeof(flags) / sizeof(flags[0])
* (91 + (2 * ((1 << TDB_HASH_GROUP_BITS) - 1))) + 1);
for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
struct hash_info h;
tdb_off_t new_off, off, subhash;
tdb = tdb_open("run-04-basichash.tdb", flags[i],
O_RDWR|O_CREAT|O_TRUNC, 0600, &hattr);
ok1(tdb);
if (!tdb)
continue;
v = 0;
/* Should not find it. */
ok1(find_and_lock(tdb, key, F_WRLCK, &h, &rec) == 0);
/* Should have created correct hash. */
ok1(h.h == tdb_hash(tdb, key.dptr, key.dsize));
/* Should have located space in group 0, bucket 0. */
ok1(h.group_start == offsetof(struct tdb_header, hashtable));
ok1(h.home_bucket == 0);
ok1(h.found_bucket == 0);
ok1(h.hash_used == TDB_TOPLEVEL_HASH_BITS);
/* Should have lock on bucket 0 */
ok1(h.hlock_start == 0);
ok1(h.hlock_range ==
1ULL << (64-(TDB_TOPLEVEL_HASH_BITS-TDB_HASH_GROUP_BITS)));
ok1((tdb->flags & TDB_NOLOCK) || tdb->num_lockrecs == 1);
ok1((tdb->flags & TDB_NOLOCK)
|| tdb->lockrecs[0].off == TDB_HASH_LOCK_START);
/* FIXME: Check lock length */
/* Allocate a new record. */
new_off = alloc(tdb, key.dsize, dbuf.dsize, h.h, false);
ok1(new_off != TDB_OFF_ERR);
/* We should be able to add it now. */
ok1(add_to_hash(tdb, &h, new_off) == 0);
/* Make sure we fill it in for later finding. */
off = new_off + sizeof(struct tdb_used_record);
ok1(!tdb->methods->write(tdb, off, key.dptr, key.dsize));
off += key.dsize;
ok1(!tdb->methods->write(tdb, off, dbuf.dptr, dbuf.dsize));
/* We should be able to unlock that OK. */
ok1(tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range,
F_WRLCK) == 0);
/* Database should be consistent. */
ok1(tdb_check(tdb, NULL, NULL) == 0);
/* Now, this should give a successful lookup. */
ok1(find_and_lock(tdb, key, F_WRLCK, &h, &rec) == new_off);
/* Should have created correct hash. */
ok1(h.h == tdb_hash(tdb, key.dptr, key.dsize));
/* Should have located space in group 0, bucket 0. */
ok1(h.group_start == offsetof(struct tdb_header, hashtable));
ok1(h.home_bucket == 0);
ok1(h.found_bucket == 0);
ok1(h.hash_used == TDB_TOPLEVEL_HASH_BITS);
/* Should have lock on bucket 0 */
ok1(h.hlock_start == 0);
ok1(h.hlock_range ==
1ULL << (64-(TDB_TOPLEVEL_HASH_BITS-TDB_HASH_GROUP_BITS)));
ok1((tdb->flags & TDB_NOLOCK) || tdb->num_lockrecs == 1);
ok1((tdb->flags & TDB_NOLOCK)
|| tdb->lockrecs[0].off == TDB_HASH_LOCK_START);
/* FIXME: Check lock length */
ok1(tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range,
F_WRLCK) == 0);
/* Database should be consistent. */
ok1(tdb_check(tdb, NULL, NULL) == 0);
/* Test expansion. */
v = 1;
ok1(find_and_lock(tdb, key, F_WRLCK, &h, &rec) == 0);
/* Should have created correct hash. */
ok1(h.h == tdb_hash(tdb, key.dptr, key.dsize));
/* Should have located space in group 0, bucket 1. */
ok1(h.group_start == offsetof(struct tdb_header, hashtable));
ok1(h.home_bucket == 0);
ok1(h.found_bucket == 1);
ok1(h.hash_used == TDB_TOPLEVEL_HASH_BITS);
/* Should have lock on bucket 0 */
ok1(h.hlock_start == 0);
ok1(h.hlock_range ==
1ULL << (64-(TDB_TOPLEVEL_HASH_BITS-TDB_HASH_GROUP_BITS)));
ok1((tdb->flags & TDB_NOLOCK) || tdb->num_lockrecs == 1);
ok1((tdb->flags & TDB_NOLOCK)
|| tdb->lockrecs[0].off == TDB_HASH_LOCK_START);
/* FIXME: Check lock length */
/* Make it expand 0'th bucket. */
ok1(expand_group(tdb, &h) == 0);
/* First one should be subhash, next should be empty. */
ok1(is_subhash(h.group[0]));
subhash = (h.group[0] & TDB_OFF_MASK);
for (j = 1; j < (1 << TDB_HASH_GROUP_BITS); j++)
ok1(h.group[j] == 0);
ok1(tdb_write_convert(tdb, h.group_start,
h.group, sizeof(h.group)) == 0);
ok1(tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range,
F_WRLCK) == 0);
/* Should be happy with expansion. */
ok1(tdb_check(tdb, NULL, NULL) == 0);
/* Should be able to find it. */
v = 0;
ok1(find_and_lock(tdb, key, F_WRLCK, &h, &rec) == new_off);
/* Should have created correct hash. */
ok1(h.h == tdb_hash(tdb, key.dptr, key.dsize));
/* Should have located space in expanded group 0, bucket 0. */
ok1(h.group_start == subhash + sizeof(struct tdb_used_record));
ok1(h.home_bucket == 0);
ok1(h.found_bucket == 0);
ok1(h.hash_used == TDB_TOPLEVEL_HASH_BITS
+ TDB_SUBLEVEL_HASH_BITS);
/* Should have lock on bucket 0 */
ok1(h.hlock_start == 0);
ok1(h.hlock_range ==
1ULL << (64-(TDB_TOPLEVEL_HASH_BITS-TDB_HASH_GROUP_BITS)));
ok1((tdb->flags & TDB_NOLOCK) || tdb->num_lockrecs == 1);
ok1((tdb->flags & TDB_NOLOCK)
|| tdb->lockrecs[0].off == TDB_HASH_LOCK_START);
/* FIXME: Check lock length */
/* Simple delete should work. */
ok1(delete_from_hash(tdb, &h) == 0);
ok1(add_free_record(tdb, rec_zone_bits(&rec), new_off,
sizeof(struct tdb_used_record)
+ rec_key_length(&rec)
+ rec_data_length(&rec)
+ rec_extra_padding(&rec)) == 0);
ok1(tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range,
F_WRLCK) == 0);
ok1(tdb_check(tdb, NULL, NULL) == 0);
/* Test second-level expansion: should expand 0th bucket. */
v = 0;
ok1(find_and_lock(tdb, key, F_WRLCK, &h, &rec) == 0);
/* Should have created correct hash. */
ok1(h.h == tdb_hash(tdb, key.dptr, key.dsize));
/* Should have located space in group 0, bucket 0. */
ok1(h.group_start == subhash + sizeof(struct tdb_used_record));
ok1(h.home_bucket == 0);
ok1(h.found_bucket == 0);
ok1(h.hash_used == TDB_TOPLEVEL_HASH_BITS+TDB_SUBLEVEL_HASH_BITS);
/* Should have lock on bucket 0 */
ok1(h.hlock_start == 0);
ok1(h.hlock_range ==
1ULL << (64-(TDB_TOPLEVEL_HASH_BITS-TDB_HASH_GROUP_BITS)));
ok1((tdb->flags & TDB_NOLOCK) || tdb->num_lockrecs == 1);
ok1((tdb->flags & TDB_NOLOCK)
|| tdb->lockrecs[0].off == TDB_HASH_LOCK_START);
/* FIXME: Check lock length */
ok1(expand_group(tdb, &h) == 0);
/* First one should be subhash, next should be empty. */
ok1(is_subhash(h.group[0]));
subhash = (h.group[0] & TDB_OFF_MASK);
for (j = 1; j < (1 << TDB_HASH_GROUP_BITS); j++)
ok1(h.group[j] == 0);
ok1(tdb_write_convert(tdb, h.group_start,
h.group, sizeof(h.group)) == 0);
ok1(tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range,
F_WRLCK) == 0);
/* Should be happy with expansion. */
ok1(tdb_check(tdb, NULL, NULL) == 0);
ok1(find_and_lock(tdb, key, F_WRLCK, &h, &rec) == 0);
/* Should have created correct hash. */
ok1(h.h == tdb_hash(tdb, key.dptr, key.dsize));
/* Should have located space in group 0, bucket 0. */
ok1(h.group_start == subhash + sizeof(struct tdb_used_record));
ok1(h.home_bucket == 0);
ok1(h.found_bucket == 0);
ok1(h.hash_used == TDB_TOPLEVEL_HASH_BITS
+ TDB_SUBLEVEL_HASH_BITS * 2);
/* We should be able to add it now. */
/* Allocate a new record. */
new_off = alloc(tdb, key.dsize, dbuf.dsize, h.h, false);
ok1(new_off != TDB_OFF_ERR);
ok1(add_to_hash(tdb, &h, new_off) == 0);
/* Make sure we fill it in for later finding. */
off = new_off + sizeof(struct tdb_used_record);
ok1(!tdb->methods->write(tdb, off, key.dptr, key.dsize));
off += key.dsize;
ok1(!tdb->methods->write(tdb, off, dbuf.dptr, dbuf.dsize));
/* We should be able to unlock that OK. */
ok1(tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range,
F_WRLCK) == 0);
/* Database should be consistent. */
ok1(tdb_check(tdb, NULL, NULL) == 0);
/* Should be able to find it. */
v = 0;
ok1(find_and_lock(tdb, key, F_WRLCK, &h, &rec) == new_off);
/* Should have created correct hash. */
ok1(h.h == tdb_hash(tdb, key.dptr, key.dsize));
/* Should have located space in expanded group 0, bucket 0. */
ok1(h.group_start == subhash + sizeof(struct tdb_used_record));
ok1(h.home_bucket == 0);
ok1(h.found_bucket == 0);
ok1(h.hash_used == TDB_TOPLEVEL_HASH_BITS
+ TDB_SUBLEVEL_HASH_BITS * 2);
tdb_close(tdb);
}
ok1(tap_log_messages == 0);
return exit_status();
}
......@@ -2,6 +2,7 @@
#include <ccan/tdb2/free.c>
#include <ccan/tdb2/lock.c>
#include <ccan/tdb2/io.c>
#include <ccan/tdb2/hash.c>
#include <ccan/tdb2/check.c>
#include <ccan/tap/tap.h>
#include "logging.h"
......@@ -18,7 +19,7 @@ int main(int argc, char *argv[])
plan_tests(sizeof(flags) / sizeof(flags[0]) * 9 + 1);
for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
tdb = tdb_open("run-simple-store.tdb", flags[i],
tdb = tdb_open("run-10-simple-store.tdb", flags[i],
O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
ok1(tdb);
if (tdb) {
......
......@@ -2,6 +2,7 @@
#include <ccan/tdb2/free.c>
#include <ccan/tdb2/lock.c>
#include <ccan/tdb2/io.c>
#include <ccan/tdb2/hash.c>
#include <ccan/tdb2/check.c>
#include <ccan/tap/tap.h>
#include "logging.h"
......@@ -18,7 +19,7 @@ int main(int argc, char *argv[])
plan_tests(sizeof(flags) / sizeof(flags[0]) * 8 + 1);
for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
tdb = tdb_open("run-simple-fetch.tdb", flags[i],
tdb = tdb_open("run-11-simple-fetch.tdb", flags[i],
O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
ok1(tdb);
if (tdb) {
......
#include <ccan/tdb2/tdb.c>
#include <ccan/tdb2/free.c>
#include <ccan/tdb2/lock.c>
#include <ccan/tdb2/io.c>
#include <ccan/tdb2/hash.c>
#include <ccan/tdb2/check.c>
#include <ccan/tap/tap.h>
#include "logging.h"
/* We use the same seed which we saw a failure on. */
static uint64_t fixedhash(const void *key, size_t len, uint64_t seed, void *p)
{
return hash64_stable((const unsigned char *)key, len,
*(uint64_t *)p);
}
static bool equal(struct tdb_data a, struct tdb_data b)
{
if (a.dsize != b.dsize)
return false;
return memcmp(a.dptr, b.dptr, a.dsize) == 0;
}
int main(int argc, char *argv[])
{
unsigned int i, j;
struct tdb_context *tdb;
uint64_t seed = 16014841315512641303ULL;
union tdb_attribute fixed_hattr
= { .hash = { .base = { TDB_ATTRIBUTE_HASH },
.hash_fn = fixedhash,
.hash_private = &seed } };
int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP,
TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT,
TDB_NOMMAP|TDB_CONVERT };
struct tdb_data key = { (unsigned char *)&j, sizeof(j) };
struct tdb_data data = { (unsigned char *)&j, sizeof(j) };
fixed_hattr.base.next = &tap_log_attr;
plan_tests(sizeof(flags) / sizeof(flags[0]) * (1 + 500 * 2) + 1);
for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
tdb = tdb_open("run-12-store.tdb", flags[i],
O_RDWR|O_CREAT|O_TRUNC, 0600, &fixed_hattr);
ok1(tdb);
if (!tdb)
continue;
/* We seemed to lose some keys.
* Insert and check they're in there! */
for (j = 0; j < 500; j++) {
ok1(tdb_store(tdb, key, data, TDB_REPLACE) == 0);
ok1(equal(tdb_fetch(tdb, key), data));
}
tdb_close(tdb);
}
ok1(tap_log_messages == 0);
return exit_status();
}
......@@ -2,6 +2,7 @@
#include <ccan/tdb2/free.c>
#include <ccan/tdb2/lock.c>
#include <ccan/tdb2/io.c>
#include <ccan/tdb2/hash.c>
#include <ccan/tdb2/check.c>
#include <ccan/tap/tap.h>
#include "logging.h"
......@@ -9,12 +10,35 @@
/* We rig the hash so adjacent-numbered records always clash. */
static uint64_t clash(const void *key, size_t len, uint64_t seed, void *priv)
{
return *(unsigned int *)key / 2;
return ((uint64_t)*(unsigned int *)key)
<< (64 - TDB_TOPLEVEL_HASH_BITS - 1);
}
static void test_val(struct tdb_context *tdb, unsigned int val)
/* We use the same seed which we saw a failure on. */
static uint64_t fixedhash(const void *key, size_t len, uint64_t seed, void *p)
{
unsigned int v;
return hash64_stable((const unsigned char *)key, len,
*(uint64_t *)p);
}
static bool store_records(struct tdb_context *tdb)
{
int i;
struct tdb_data key = { (unsigned char *)&i, sizeof(i) };
struct tdb_data data = { (unsigned char *)&i, sizeof(i) };
for (i = 0; i < 1000; i++) {
if (tdb_store(tdb, key, data, TDB_REPLACE) != 0)
return false;
if (tdb_fetch(tdb, key).dsize != data.dsize)
return false;
}
return true;
}
static void test_val(struct tdb_context *tdb, uint64_t val)
{
uint64_t v;
struct tdb_data key = { (unsigned char *)&v, sizeof(v) };
struct tdb_data data = { (unsigned char *)&v, sizeof(v) };
......@@ -98,20 +122,30 @@ static void test_val(struct tdb_context *tdb, unsigned int val)
int main(int argc, char *argv[])
{
unsigned int i;
unsigned int i, j;
struct tdb_context *tdb;
union tdb_attribute hattr = { .hash = { .base = { TDB_ATTRIBUTE_HASH },
.hash_fn = clash } };
uint64_t seed = 16014841315512641303ULL;
union tdb_attribute clash_hattr
= { .hash = { .base = { TDB_ATTRIBUTE_HASH },
.hash_fn = clash } };
union tdb_attribute fixed_hattr
= { .hash = { .base = { TDB_ATTRIBUTE_HASH },
.hash_fn = fixedhash,
.hash_private = &seed } };
int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP,
TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT,
TDB_NOMMAP|TDB_CONVERT };
/* These two values gave trouble before. */
int vals[] = { 755, 837 };
hattr.base.next = &tap_log_attr;
clash_hattr.base.next = &tap_log_attr;
fixed_hattr.base.next = &tap_log_attr;
plan_tests(sizeof(flags) / sizeof(flags[0]) * 66 + 1);
plan_tests(sizeof(flags) / sizeof(flags[0])
* (32 * 3 + 5 + sizeof(vals)/sizeof(vals[0])*2) + 1);
for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
tdb = tdb_open("run-delete.tdb", flags[i],
O_RDWR|O_CREAT|O_TRUNC, 0600, &hattr);
tdb = tdb_open("run-13-delete.tdb", flags[i],
O_RDWR|O_CREAT|O_TRUNC, 0600, &clash_hattr);
ok1(tdb);
if (!tdb)
continue;
......@@ -119,11 +153,33 @@ int main(int argc, char *argv[])
/* Check start of hash table. */
test_val(tdb, 0);
/* Check end of hash table (will wrap around!). */
test_val(tdb, ((1 << tdb->header.v.hash_bits) - 1) * 2);
/* Check end of hash table. */
test_val(tdb, -1ULL);
/* Check mixed bitpattern. */
test_val(tdb, 0x123456789ABCDEF0ULL);
ok1(!tdb_has_locks(tdb));
tdb_close(tdb);
/* Deleting these entries in the db gave problems. */
tdb = tdb_open("run-13-delete.tdb", flags[i],
O_RDWR|O_CREAT|O_TRUNC, 0600, &fixed_hattr);
ok1(tdb);
if (!tdb)
continue;
ok1(store_records(tdb));
ok1(tdb_check(tdb, NULL, NULL) == 0);
for (j = 0; j < sizeof(vals)/sizeof(vals[0]); j++) {
struct tdb_data key;
key.dptr = (unsigned char *)&vals[j];
key.dsize = sizeof(vals[j]);
ok1(tdb_delete(tdb, key) == 0);
ok1(tdb_check(tdb, NULL, NULL) == 0);
}
tdb_close(tdb);
}
ok1(tap_log_messages == 0);
......
......@@ -2,6 +2,7 @@
#include <ccan/tdb2/free.c>
#include <ccan/tdb2/lock.c>
#include <ccan/tdb2/io.c>
#include <ccan/tdb2/hash.c>
#include <ccan/tdb2/check.c>
#include <ccan/tap/tap.h>
#include "logging.h"
......
#include <ccan/tdb2/tdb.c>
#include <ccan/tdb2/free.c>
#include <ccan/tdb2/lock.c>
#include <ccan/tdb2/io.c>
#include <ccan/tdb2/hash.c>
#include <ccan/tdb2/check.c>
#include <ccan/tap/tap.h>
#include "logging.h"
static uint64_t myhash(const void *key, size_t len, uint64_t seed, void *priv)
{
return *(uint64_t *)key;
}
static void add_bits(uint64_t *val, unsigned new, unsigned new_bits,
unsigned *done)
{
*done += new_bits;
*val |= ((uint64_t)new << (64 - *done));
}
static uint64_t make_key(unsigned topgroup, unsigned topbucket,
unsigned subgroup1, unsigned subbucket1,
unsigned subgroup2, unsigned subbucket2)
{
uint64_t key = 0;
unsigned done = 0;
add_bits(&key, topgroup, TDB_TOPLEVEL_HASH_BITS - TDB_HASH_GROUP_BITS,
&done);
add_bits(&key, topbucket, TDB_HASH_GROUP_BITS, &done);
add_bits(&key, subgroup1, TDB_SUBLEVEL_HASH_BITS - TDB_HASH_GROUP_BITS,
&done);
add_bits(&key, subbucket1, TDB_HASH_GROUP_BITS, &done);
add_bits(&key, subgroup2, TDB_SUBLEVEL_HASH_BITS - TDB_HASH_GROUP_BITS,
&done);
add_bits(&key, subbucket2, TDB_HASH_GROUP_BITS, &done);
return key;
}
int main(int argc, char *argv[])
{
unsigned int i, j;
struct tdb_context *tdb;
uint64_t kdata;
struct tdb_used_record rec;
struct tdb_data key = { (unsigned char *)&kdata, sizeof(kdata) };
struct tdb_data dbuf = { (unsigned char *)&kdata, sizeof(kdata) };
union tdb_attribute hattr = { .hash = { .base = { TDB_ATTRIBUTE_HASH },
.hash_fn = myhash } };
int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP,
TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT,
TDB_NOMMAP|TDB_CONVERT,
};
hattr.base.next = &tap_log_attr;
plan_tests(sizeof(flags) / sizeof(flags[0])
* (9 + (20 + 2 * ((1 << TDB_HASH_GROUP_BITS) - 2))
* (1 << TDB_HASH_GROUP_BITS)) + 1);
for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
struct hash_info h;
tdb = tdb_open("run-04-basichash.tdb", flags[i],
O_RDWR|O_CREAT|O_TRUNC, 0600, &hattr);
ok1(tdb);
if (!tdb)
continue;
/* Fill a group. */
for (j = 0; j < (1 << TDB_HASH_GROUP_BITS); j++) {
kdata = make_key(0, j, 0, 0, 0, 0);
ok1(tdb_store(tdb, key, dbuf, TDB_INSERT) == 0);
}
ok1(tdb_check(tdb, NULL, NULL) == 0);
/* Check first still exists. */
kdata = make_key(0, 0, 0, 0, 0, 0);
ok1(find_and_lock(tdb, key, F_RDLCK, &h, &rec) != 0);
/* Should have created correct hash. */
ok1(h.h == tdb_hash(tdb, key.dptr, key.dsize));
/* Should have located space in group 0, bucket 0. */
ok1(h.group_start == offsetof(struct tdb_header, hashtable));
ok1(h.home_bucket == 0);
ok1(h.found_bucket == 0);
ok1(h.hash_used == TDB_TOPLEVEL_HASH_BITS);
/* Entire group should be full! */
for (j = 0; j < (1 << TDB_HASH_GROUP_BITS); j++)
ok1(h.group[j] != 0);
ok1(tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range,
F_RDLCK) == 0);
/* Now, add one more to each should expand (that) bucket. */
for (j = 0; j < (1 << TDB_HASH_GROUP_BITS); j++) {
unsigned int k;
kdata = make_key(0, j, 0, 1, 0, 0);
ok1(tdb_store(tdb, key, dbuf, TDB_INSERT) == 0);
ok1(tdb_check(tdb, NULL, NULL) == 0);
ok1(find_and_lock(tdb, key, F_RDLCK, &h, &rec) != 0);
/* Should have created correct hash. */
ok1(h.h == tdb_hash(tdb, key.dptr, key.dsize));
/* Should have moved to subhash */
ok1(h.group_start >= sizeof(struct tdb_header));
ok1(h.home_bucket == 1);
ok1(h.found_bucket == 1);
ok1(h.hash_used == TDB_TOPLEVEL_HASH_BITS
+ TDB_SUBLEVEL_HASH_BITS);
ok1(tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range,
F_RDLCK) == 0);
/* Keep adding, make it expand again. */
for (k = 2; k < (1 << TDB_HASH_GROUP_BITS); k++) {
kdata = make_key(0, j, 0, k, 0, 0);
ok1(tdb_store(tdb, key, dbuf, TDB_INSERT) == 0);
ok1(tdb_check(tdb, NULL, NULL) == 0);
}
/* This should tip it over to sub-sub-hash. */
kdata = make_key(0, j, 0, 0, 0, 1);
ok1(tdb_store(tdb, key, dbuf, TDB_INSERT) == 0);
ok1(tdb_check(tdb, NULL, NULL) == 0);
ok1(find_and_lock(tdb, key, F_RDLCK, &h, &rec) != 0);
/* Should have created correct hash. */
ok1(h.h == tdb_hash(tdb, key.dptr, key.dsize));
/* Should have moved to subhash */
ok1(h.group_start >= sizeof(struct tdb_header));
ok1(h.home_bucket == 1);
ok1(h.found_bucket == 1);
ok1(h.hash_used == TDB_TOPLEVEL_HASH_BITS
+ TDB_SUBLEVEL_HASH_BITS + TDB_SUBLEVEL_HASH_BITS);
ok1(tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range,
F_RDLCK) == 0);
}
tdb_close(tdb);
}
ok1(tap_log_messages == 0);
return exit_status();
}
#include <ccan/tdb2/tdb.c>
#include <ccan/tdb2/free.c>
#include <ccan/tdb2/lock.c>
#include <ccan/tdb2/io.c>
#include <ccan/tdb2/check.c>
#include <ccan/tap/tap.h>
#include "logging.h"
/* We rig the hash so adjacent-numbered records always clash. */
static uint64_t clash(const void *key, size_t len, uint64_t seed, void *priv)
{
return *(unsigned int *)key / 2;
}
int main(int argc, char *argv[])
{
unsigned int i;
struct tdb_context *tdb;
unsigned int v;
struct tdb_data key = { (unsigned char *)&v, sizeof(v) };
struct tdb_data data = { (unsigned char *)&v, sizeof(v) };
union tdb_attribute hattr = { .hash = { .base = { TDB_ATTRIBUTE_HASH },
.hash_fn = clash } };
int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP,
TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT,
TDB_NOMMAP|TDB_CONVERT };
hattr.base.next = &tap_log_attr;
plan_tests(sizeof(flags) / sizeof(flags[0]) * 11 + 1);
for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
tdb = tdb_open("run-enlarge-hash.tdb", flags[i],
O_RDWR|O_CREAT|O_TRUNC, 0600, &hattr);
ok1(tdb);
if (!tdb)
continue;
/* Put a single entry in. */
v = 0;
ok1(tdb_store(tdb, key, data, TDB_INSERT) == 0);
enlarge_hash(tdb);
ok1(tdb_check(tdb, NULL, NULL) == 0);
/* Put a non-clashing entry in. */
v = 2;
ok1(tdb_store(tdb, key, data, TDB_INSERT) == 0);
enlarge_hash(tdb);
ok1(tdb_check(tdb, NULL, NULL) == 0);
/* Now, make a clash. */
v = 1;
ok1(tdb_store(tdb, key, data, TDB_INSERT) == 0);
enlarge_hash(tdb);
ok1(tdb_check(tdb, NULL, NULL) == 0);
/* Clash at end. */
v = ((1 << tdb->header.v.hash_bits) - 1) * 2;
ok1(tdb_store(tdb, key, data, TDB_INSERT) == 0);
v++;
ok1(tdb_store(tdb, key, data, TDB_INSERT) == 0);
ok1(tdb_check(tdb, NULL, NULL) == 0);
enlarge_hash(tdb);
ok1(tdb_check(tdb, NULL, NULL) == 0);
tdb_close(tdb);
}
ok1(tap_log_messages == 0);
return exit_status();
}
#include <ccan/tdb2/tdb.c>
#include <ccan/tdb2/free.c>
#include <ccan/tdb2/lock.c>
#include <ccan/tdb2/io.c>
#include <ccan/tdb2/check.c>
#include <ccan/tap/tap.h>
#include "logging.h"
/* We rig the hash so adjacent-numbered records always clash. */
static uint64_t clash(const void *key, size_t len, uint64_t seed, void *priv)
{
return *(unsigned int *)key / 2;
}
static void test_val(struct tdb_context *tdb, unsigned int val)
{
unsigned int v;
struct tdb_data key = { (unsigned char *)&v, sizeof(v) };
struct tdb_data data = { (unsigned char *)&v, sizeof(v) };
/* Insert two entries, with the same hash. */
v = val;
ok1(tdb_store(tdb, key, data, TDB_INSERT) == 0);
v = val + 1;
ok1(tdb_store(tdb, key, data, TDB_INSERT) == 0);
ok1(tdb_check(tdb, NULL, NULL) == 0);
/* Can find both? */
v = val;
ok1(tdb_fetch(tdb, key).dsize == data.dsize);
v = val + 1;
ok1(tdb_fetch(tdb, key).dsize == data.dsize);
}
int main(int argc, char *argv[])
{
unsigned int i;
struct tdb_context *tdb;
union tdb_attribute hattr = { .hash = { .base = { TDB_ATTRIBUTE_HASH },
.hash_fn = clash } };
int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP,
TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT,
TDB_NOMMAP|TDB_CONVERT,
};
hattr.base.next = &tap_log_attr;
plan_tests(sizeof(flags) / sizeof(flags[0]) * 14 + 1);
for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
tdb = tdb_open("run-hashclash.tdb", flags[i],
O_RDWR|O_CREAT|O_TRUNC, 0600, &hattr);
ok1(tdb);
if (!tdb)
continue;
/* Check start of hash table. */
test_val(tdb, 0);
ok1(!tdb_has_locks(tdb));
tdb_close(tdb);
tdb = tdb_open("run-hashclash.tdb", flags[i],
O_RDWR|O_CREAT|O_TRUNC, 0600, &hattr);
ok1(tdb);
if (!tdb)
continue;
/* Check end of hash table (will wrap around!). */
test_val(tdb, ((1 << tdb->header.v.hash_bits) - 1) * 2);
ok1(!tdb_has_locks(tdb));
tdb_close(tdb);
}
ok1(tap_log_messages == 0);
return exit_status();
}
......@@ -3,6 +3,7 @@
#include <ccan/tdb2/free.c>
#include <ccan/tdb2/lock.c>
#include <ccan/tdb2/io.c>
#include <ccan/tdb2/hash.c>
#include <ccan/tdb2/check.c>
#include <ccan/tdb2/traverse.c>
#include <ccan/tap/tap.h>
......
......@@ -2,6 +2,7 @@
#include <ccan/tdb2/free.c>
#include <ccan/tdb2/lock.c>
#include <ccan/tdb2/io.c>
#include <ccan/tdb2/hash.c>
#include <ccan/tdb2/check.c>
#include <ccan/tap/tap.h>
#include "logging.h"
......
......@@ -2,6 +2,7 @@
#include <ccan/tdb2/free.c>
#include <ccan/tdb2/lock.c>
#include <ccan/tdb2/io.c>
#include <ccan/tdb2/hash.c>
#include <ccan/tdb2/check.c>
#include <ccan/tap/tap.h>
#include "logging.h"
......
......@@ -2,6 +2,7 @@
#include <ccan/tdb2/free.c>
#include <ccan/tdb2/lock.c>
#include <ccan/tdb2/io.c>
#include <ccan/tdb2/hash.c>
#include <ccan/tdb2/check.c>
#include <ccan/tdb2/traverse.c>
#include <ccan/tap/tap.h>
......@@ -54,6 +55,7 @@ static int trav(struct tdb_context *tdb, TDB_DATA key, TDB_DATA dbuf, void *p)
td->high = val;
if (td->delete) {
if (tdb_delete(tdb, key) != 0) {
td->delete_error = tdb_error(tdb);
return -1;
......@@ -208,7 +210,7 @@ int main(int argc, char *argv[])
ok1(td.low == td.high);
ok1(tdb_check(tdb, NULL, NULL) == 0);
/* Deleting traverse. */
/* Deleting traverse (delete everything). */
td.calls = 0;
td.call_limit = UINT_MAX;
td.low = INT_MAX;
......@@ -222,7 +224,7 @@ int main(int argc, char *argv[])
ok1(!td.mismatch);
ok1(td.calls == NUM_RECORDS);
ok1(td.low == 0);
ok1(td.high == NUM_RECORDS-1);
ok1(td.high == NUM_RECORDS - 1);
ok1(tdb_check(tdb, NULL, NULL) == 0);
/* Now it's empty! */
......
......@@ -21,91 +21,24 @@
static int64_t traverse(struct tdb_context *tdb, int ltype,
tdb_traverse_func fn, void *p)
{
uint64_t i, num, count = 0;
tdb_off_t off, prev_bucket;
struct tdb_used_record rec;
int ret;
struct traverse_info tinfo;
struct tdb_data k, d;
bool finish = false;
int64_t count = 0;
/* FIXME: Do we need to start at 0? */
prev_bucket = tdb_lock_list(tdb, 0, ltype, TDB_LOCK_WAIT);
if (prev_bucket != 0)
return -1;
num = (1ULL << tdb->header.v.hash_bits);
for (i = tdb_find_nonzero_off(tdb, hash_off(tdb, 0), num);
i != num && !finish;
i += tdb_find_nonzero_off(tdb, hash_off(tdb, i), num - i)) {
if (tdb_lock_list(tdb, i, ltype, TDB_LOCK_WAIT) != i)
goto fail;
off = tdb_read_off(tdb, hash_off(tdb, i));
if (off == TDB_OFF_ERR) {
tdb_unlock_list(tdb, i, ltype);
goto fail;
}
/* This race can happen, but look again. */
if (off == 0) {
tdb_unlock_list(tdb, i, ltype);
continue;
}
/* Drop previous lock. */
tdb_unlock_list(tdb, prev_bucket, ltype);
prev_bucket = i;
if (tdb_read_convert(tdb, off, &rec, sizeof(rec)) != 0)
goto fail;
k.dsize = rec_key_length(&rec);
d.dsize = rec_data_length(&rec);
if (ltype == F_RDLCK) {
/* Read traverses can keep the lock. */
k.dptr = (void *)tdb_access_read(tdb,
off + sizeof(rec),
k.dsize + d.dsize,
false);
} else {
k.dptr = tdb_alloc_read(tdb, off + sizeof(rec),
k.dsize + d.dsize);
}
if (!k.dptr)
goto fail;
for (ret = first_in_hash(tdb, ltype, &tinfo, &k, &d.dsize);
ret == 1;
ret = next_in_hash(tdb, ltype, &tinfo, &k, &d.dsize)) {
d.dptr = k.dptr + k.dsize;
count++;
if (ltype == F_WRLCK) {
/* Drop lock before calling out. */
tdb_unlock_list(tdb, i, ltype);
}
if (fn && fn(tdb, k, d, p))
finish = true;
if (ltype == F_WRLCK) {
free(k.dptr);
/* Regain lock. FIXME: Is this necessary? */
if (tdb_lock_list(tdb, i, ltype, TDB_LOCK_WAIT) != i)
return -1;
/* This makes deleting under ourselves a bit nicer. */
if (tdb_read_off(tdb, hash_off(tdb, i)) == off)
i++;
} else {
tdb_access_release(tdb, k.dptr);
i++;
}
break;
}
/* Drop final lock. */
tdb_unlock_list(tdb, prev_bucket, ltype);
if (ret < 0)
return -1;
return count;
fail:
tdb_unlock_list(tdb, prev_bucket, ltype);
return -1;
}
int64_t tdb_traverse(struct tdb_context *tdb, tdb_traverse_func fn, void *p)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment