Commit d70577b6 authored by Rusty Russell's avatar Rusty Russell

tdb2: get rid of zones

Zones were a bad idea.  They mean we can't simply add stuff to the end
of the file (which transactions relied upon), and there's no good heuristic
in how to size them.

This patch reverts us to a single free table.
parent 2ecf943a
......@@ -237,7 +237,7 @@ static bool check_hash_tree(struct tdb_context *tdb,
goto fail;
/* Bottom bits must match header. */
if ((h & ((1 << 5)-1)) != rec_hash(&rec)) {
if ((h & ((1 << 11)-1)) != rec_hash(&rec)) {
tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv,
"tdb_check: Bad hash magic at"
" offset %llu (0x%llx vs 0x%llx)\n",
......@@ -267,7 +267,8 @@ static bool check_hash(struct tdb_context *tdb,
0, 0, used, num_used, &num_found))
return false;
if (num_found != num_used) {
/* 1 is for the free list. */
if (num_found != num_used - 1) {
tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv,
"tdb_check: Not all entries are in hash\n");
return false;
......@@ -278,8 +279,7 @@ static bool check_hash(struct tdb_context *tdb,
static bool check_free(struct tdb_context *tdb,
tdb_off_t off,
const struct tdb_free_record *frec,
tdb_off_t prev,
tdb_off_t zone_off, unsigned int bucket)
tdb_off_t prev, unsigned int bucket)
{
if (frec_magic(frec) != TDB_FREE_MAGIC) {
tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv,
......@@ -291,20 +291,11 @@ static bool check_free(struct tdb_context *tdb,
+ frec->data_len+sizeof(struct tdb_used_record),
false))
return false;
if (off < zone_off || off >= zone_off + (1ULL<<frec_zone_bits(frec))) {
tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv,
"tdb_check: offset %llu outside zone %llu-%llu\n",
(long long)off,
(long long)zone_off,
(long long)zone_off + (1ULL<<frec_zone_bits(frec)));
return false;
}
if (size_to_bucket(frec_zone_bits(frec), frec->data_len) != bucket) {
if (size_to_bucket(frec->data_len) != bucket) {
tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv,
"tdb_check: offset %llu in wrong bucket %u vs %u\n",
(long long)off,
bucket,
size_to_bucket(frec_zone_bits(frec), frec->data_len));
bucket, size_to_bucket(frec->data_len));
return false;
}
if (prev != frec->prev) {
......@@ -317,31 +308,41 @@ static bool check_free(struct tdb_context *tdb,
return true;
}
static tdb_len_t check_free_list(struct tdb_context *tdb,
tdb_off_t zone_off,
static bool check_free_list(struct tdb_context *tdb,
tdb_off_t flist_off,
tdb_off_t free[],
size_t num_free,
size_t *num_found)
{
struct free_zone_header zhdr;
struct tdb_freelist flist;
tdb_off_t h;
unsigned int i;
if (tdb_read_convert(tdb, zone_off, &zhdr, sizeof(zhdr)) == -1)
return TDB_OFF_ERR;
if (tdb_read_convert(tdb, flist_off, &flist, sizeof(flist)) == -1)
return false;
if (rec_magic(&flist.hdr) != TDB_MAGIC
|| rec_key_length(&flist.hdr) != 0
|| rec_data_length(&flist.hdr) != sizeof(flist) - sizeof(flist.hdr)
|| rec_hash(&flist.hdr) != 1) {
tdb->log(tdb, TDB_DEBUG_ERROR,
tdb->log_priv,
"tdb_check: Invalid header on free list\n");
return false;
}
for (i = 0; i <= BUCKETS_FOR_ZONE(zhdr.zone_bits); i++) {
for (i = 0; i < TDB_FREE_BUCKETS; i++) {
tdb_off_t off, prev = 0, *p;
struct tdb_free_record f;
h = bucket_off(zone_off, i);
h = bucket_off(flist_off, i);
for (off = tdb_read_off(tdb, h); off; off = f.next) {
if (off == TDB_OFF_ERR)
return TDB_OFF_ERR;
return false;
if (tdb_read_convert(tdb, off, &f, sizeof(f)))
return TDB_OFF_ERR;
if (!check_free(tdb, off, &f, prev, zone_off, i))
return TDB_OFF_ERR;
return false;
if (!check_free(tdb, off, &f, prev, i))
return false;
/* FIXME: Check hash bits */
p = asearch(&off, free, num_free, off_cmp);
......@@ -351,7 +352,7 @@ static tdb_len_t check_free_list(struct tdb_context *tdb,
"tdb_check: Invalid offset"
" %llu in free table\n",
(long long)off);
return TDB_OFF_ERR;
return false;
}
/* Mark it invalid. */
*p ^= 1;
......@@ -359,79 +360,38 @@ static tdb_len_t check_free_list(struct tdb_context *tdb,
prev = off;
}
}
return 1ULL << zhdr.zone_bits;
return true;
}
static tdb_off_t check_zone(struct tdb_context *tdb, tdb_off_t zone_off,
static bool check_linear(struct tdb_context *tdb,
tdb_off_t **used, size_t *num_used,
tdb_off_t **free, size_t *num_free,
unsigned int *max_zone_bits)
tdb_off_t **free, size_t *num_free)
{
struct free_zone_header zhdr;
tdb_off_t off, hdrlen, end;
tdb_off_t off;
tdb_len_t len;
if (tdb_read_convert(tdb, zone_off, &zhdr, sizeof(zhdr)) == -1)
return TDB_OFF_ERR;
if (zhdr.zone_bits < INITIAL_ZONE_BITS) {
tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv,
"check: bad zone_bits %llu at zone %llu\n",
(long long)zhdr.zone_bits, (long long)zone_off);
return TDB_OFF_ERR;
}
/* Zone bits can only increase... */
if (zhdr.zone_bits > *max_zone_bits)
*max_zone_bits = zhdr.zone_bits;
else if (zhdr.zone_bits < *max_zone_bits) {
tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv,
"check: small zone_bits %llu at zone %llu\n",
(long long)zhdr.zone_bits, (long long)zone_off);
return TDB_OFF_ERR;
}
/* Zone header must be within file! */
hdrlen = sizeof(zhdr)
+ (BUCKETS_FOR_ZONE(zhdr.zone_bits) + 1) * sizeof(tdb_off_t);
if (tdb->methods->oob(tdb, zone_off + hdrlen, true))
return TDB_OFF_ERR;
end = zone_off + (1ULL << zhdr.zone_bits);
if (end > tdb->map_size)
end = tdb->map_size;
for (off = zone_off + hdrlen; off < end; off += len) {
for (off = sizeof(struct tdb_header); off < tdb->map_size; off += len) {
union {
struct tdb_used_record u;
struct tdb_free_record f;
} pad, *p;
p = tdb_get(tdb, off, &pad, sizeof(pad));
if (!p)
return TDB_OFF_ERR;
return false;
if (frec_magic(&p->f) == TDB_FREE_MAGIC
|| frec_magic(&p->f) == TDB_COALESCING_MAGIC) {
if (frec_zone_bits(&p->f) != zhdr.zone_bits) {
tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv,
"tdb_check: Bad free zone bits %u"
" at offset %llu\n",
frec_zone_bits(&p->f),
(long long)off);
return TDB_OFF_ERR;
}
len = sizeof(p->u) + p->f.data_len;
if (off + len > zone_off + (1ULL << zhdr.zone_bits)) {
if (off + len > tdb->map_size) {
tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv,
"tdb_check: free overlength %llu"
" at offset %llu\n",
(long long)len, (long long)off);
return TDB_OFF_ERR;
return false;
}
/* This record is free! */
if (frec_magic(&p->f) == TDB_FREE_MAGIC
&& !append(free, num_free, off))
return TDB_OFF_ERR;
return false;
} else {
uint64_t klen, dlen, extra;
......@@ -442,32 +402,23 @@ static tdb_off_t check_zone(struct tdb_context *tdb, tdb_off_t zone_off,
" at offset %llu\n",
(long long)rec_magic(&p->u),
(long long)off);
return TDB_OFF_ERR;
}
if (rec_zone_bits(&p->u) != zhdr.zone_bits) {
tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv,
"tdb_check: Bad zone bits %u"
" at offset %llu\n",
rec_zone_bits(&p->u),
(long long)off);
return TDB_OFF_ERR;
return false;
}
if (!append(used, num_used, off))
return TDB_OFF_ERR;
return false;
klen = rec_key_length(&p->u);
dlen = rec_data_length(&p->u);
extra = rec_extra_padding(&p->u);
len = sizeof(p->u) + klen + dlen + extra;
if (off + len > zone_off + (1ULL << zhdr.zone_bits)) {
if (off + len > tdb->map_size) {
tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv,
"tdb_check: used overlength %llu"
" at offset %llu\n",
(long long)len, (long long)off);
return TDB_OFF_ERR;
return false;
}
if (len < sizeof(p->f)) {
......@@ -475,11 +426,11 @@ static tdb_off_t check_zone(struct tdb_context *tdb, tdb_off_t zone_off,
"tdb_check: too short record %llu at"
" %llu\n",
(long long)len, (long long)off);
return TDB_OFF_ERR;
return false;
}
}
}
return off - zone_off;
return true;
}
/* FIXME: call check() function. */
......@@ -487,10 +438,8 @@ int tdb_check(struct tdb_context *tdb,
int (*check)(TDB_DATA key, TDB_DATA data, void *private_data),
void *private_data)
{
tdb_off_t *free = NULL, *used = NULL, off;
tdb_len_t len;
tdb_off_t *free = NULL, *used = NULL;
size_t num_free = 0, num_used = 0, num_found = 0;
unsigned max_zone_bits = INITIAL_ZONE_BITS;
if (tdb_allrecord_lock(tdb, F_RDLCK, TDB_LOCK_WAIT, false) != 0)
return -1;
......@@ -504,26 +453,16 @@ int tdb_check(struct tdb_context *tdb,
goto fail;
/* First we do a linear scan, checking all records. */
for (off = sizeof(struct tdb_header);
off < tdb->map_size;
off += len) {
len = check_zone(tdb, off, &used, &num_used, &free, &num_free,
&max_zone_bits);
if (len == TDB_OFF_ERR)
if (!check_linear(tdb, &used, &num_used, &free, &num_free))
goto fail;
}
/* FIXME: Check key uniqueness? */
if (!check_hash(tdb, used, num_used))
goto fail;
for (off = sizeof(struct tdb_header);
off < tdb->map_size - 1;
off += len) {
len = check_free_list(tdb, off, free, num_free, &num_found);
if (len == TDB_OFF_ERR)
if (!check_free_list(tdb, tdb->flist_off, free, num_free, &num_found))
goto fail;
}
if (num_found != num_free) {
tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv,
"tdb_check: Not all entries are in free table\n");
......
......@@ -27,46 +27,8 @@ static unsigned fls64(uint64_t val)
return ilog64(val);
}
static unsigned ffs64(uint64_t val)
{
#if HAVE_BUILTIN_FFSLL
return __builtin_ffsll(val);
#else
unsigned r = 0;
if (!val)
return 0;
if (!(val & 0xffffffff)) {
val >>= 32;
r += 32;
}
if (!(val & 0xffff)) {
val >>= 16;
r += 16;
}
if (!(val & 0xff)) {
val >>= 8;
r += 8;
}
if (!(val & 0xf)) {
val >>= 4;
r += 4;
}
if (!(val & 0x3)) {
val >>= 2;
r += 2;
}
if (!(val & 0x1)) {
val >>= 1;
r += 1;
}
return r;
#endif
}
/* In which bucket would we find a particular record size? (ignoring header) */
unsigned int size_to_bucket(unsigned int zone_bits, tdb_len_t data_len)
unsigned int size_to_bucket(tdb_len_t data_len)
{
unsigned int bucket;
......@@ -82,72 +44,24 @@ unsigned int size_to_bucket(unsigned int zone_bits, tdb_len_t data_len)
bucket = fls64(data_len - TDB_MIN_DATA_LEN) + 2;
}
if (unlikely(bucket > BUCKETS_FOR_ZONE(zone_bits)))
bucket = BUCKETS_FOR_ZONE(zone_bits);
if (unlikely(bucket >= TDB_FREE_BUCKETS))
bucket = TDB_FREE_BUCKETS - 1;
return bucket;
}
/* Binary search for the zone for this offset. */
static tdb_off_t off_to_zone(struct tdb_context *tdb, tdb_off_t off,
struct free_zone_header *zhdr)
int tdb_flist_init(struct tdb_context *tdb)
{
tdb_off_t start, end;
start = sizeof(struct tdb_header);
end = start + (1ULL << fls64(tdb->map_size - start));
for (;;) {
if (tdb_read_convert(tdb, start, zhdr, sizeof(*zhdr)) == -1)
return TDB_OFF_ERR;
/* Is it inside this zone? */
if (off < start + (1ULL << zhdr->zone_bits))
return start;
/* In practice, start + end won't overflow. */
if (off >= (start + end) / 2)
start = (start + end) / 2;
else
end = (start + end) / 2;
}
}
static tdb_off_t last_zone(struct tdb_context *tdb,
struct free_zone_header *zhdr)
{
return off_to_zone(tdb, tdb->map_size - 1, zhdr);
}
int tdb_zone_init(struct tdb_context *tdb)
{
unsigned int i;
uint64_t randoff = 0;
/* We start in a random zone, to spread the load. */
for (i = 0; i < 64; i += fls64(RAND_MAX))
randoff ^= ((uint64_t)random()) << i;
randoff = sizeof(struct tdb_header)
+ (randoff % (tdb->map_size - sizeof(struct tdb_header)));
tdb->zone_off = off_to_zone(tdb, randoff, &tdb->zhdr);
if (tdb->zone_off == TDB_OFF_ERR)
tdb->flist_off = tdb_read_off(tdb,
offsetof(struct tdb_header, free_list));
if (tdb->flist_off == TDB_OFF_ERR)
return -1;
return 0;
}
/* Where's the header, given a zone size of 1 << zone_bits? */
static tdb_off_t zone_off(tdb_off_t off, unsigned int zone_bits)
{
off -= sizeof(struct tdb_header);
return (off & ~((1ULL << zone_bits) - 1)) + sizeof(struct tdb_header);
}
/* Offset of a given bucket. */
/* FIXME: bucket can be "unsigned" everywhere, or even uint8/16. */
tdb_off_t bucket_off(tdb_off_t zone_off, tdb_off_t bucket)
tdb_off_t bucket_off(tdb_off_t flist_off, unsigned bucket)
{
return zone_off
+ sizeof(struct free_zone_header)
return flist_off + offsetof(struct tdb_freelist, buckets)
+ bucket * sizeof(tdb_off_t);
}
......@@ -155,9 +69,8 @@ tdb_off_t bucket_off(tdb_off_t zone_off, tdb_off_t bucket)
static tdb_off_t find_free_head(struct tdb_context *tdb, tdb_off_t bucket)
{
/* Speculatively search for a non-zero bucket. */
return tdb_find_nonzero_off(tdb, bucket_off(tdb->zone_off, 0),
bucket,
BUCKETS_FOR_ZONE(tdb->zhdr.zone_bits) + 1);
return tdb_find_nonzero_off(tdb, bucket_off(tdb->flist_off, 0),
bucket, TDB_FREE_BUCKETS);
}
/* Remove from free bucket. */
......@@ -247,7 +160,6 @@ static int enqueue_in_free(struct tdb_context *tdb,
/* List need not be locked. */
int add_free_record(struct tdb_context *tdb,
unsigned int zone_bits,
tdb_off_t off, tdb_len_t len_with_header)
{
struct tdb_free_record new;
......@@ -255,13 +167,11 @@ int add_free_record(struct tdb_context *tdb,
int ret;
assert(len_with_header >= sizeof(new));
assert(zone_bits < 64);
new.magic_and_meta = TDB_FREE_MAGIC | zone_bits;
new.magic_and_meta = TDB_FREE_MAGIC;
new.data_len = len_with_header - sizeof(struct tdb_used_record);
b_off = bucket_off(zone_off(off, zone_bits),
size_to_bucket(zone_bits, new.data_len));
b_off = bucket_off(tdb->flist_off, size_to_bucket(new.data_len));
if (tdb_lock_free_bucket(tdb, b_off, TDB_LOCK_WAIT) != 0)
return -1;
......@@ -299,19 +209,14 @@ static size_t record_leftover(size_t keylen, size_t datalen,
/* Note: we unlock the current bucket if we coalesce or fail. */
static int coalesce(struct tdb_context *tdb,
tdb_off_t zone_off, unsigned zone_bits,
tdb_off_t off, tdb_off_t b_off, tdb_len_t data_len)
{
struct tdb_free_record pad, *r;
tdb_off_t zone_end, end;
tdb_off_t end;
end = off + sizeof(struct tdb_used_record) + data_len;
zone_end = zone_off + (1ULL << zone_bits);
if (tdb->methods->oob(tdb, zone_end, true))
zone_end = tdb->map_size;
while (end < zone_end) {
while (end < tdb->map_size) {
tdb_off_t nb_off;
/* FIXME: do tdb_get here and below really win? */
......@@ -322,8 +227,8 @@ static int coalesce(struct tdb_context *tdb,
if (frec_magic(r) != TDB_FREE_MAGIC)
break;
nb_off = bucket_off(zone_off,
size_to_bucket(zone_bits, r->data_len));
/* FIXME: Use flist from record */
nb_off = bucket_off(tdb->flist_off,size_to_bucket(r->data_len));
/* We may be violating lock order here, so best effort. */
if (tdb_lock_free_bucket(tdb, nb_off, TDB_LOCK_NOWAIT) == -1)
......@@ -341,8 +246,8 @@ static int coalesce(struct tdb_context *tdb,
break;
}
if (unlikely(bucket_off(zone_off,
size_to_bucket(zone_bits, r->data_len))
if (unlikely(bucket_off(tdb->flist_off,
size_to_bucket(r->data_len))
!= nb_off)) {
tdb_unlock_free_bucket(tdb, nb_off);
break;
......@@ -383,14 +288,14 @@ static int coalesce(struct tdb_context *tdb,
/* We have to drop this to avoid deadlocks, so make sure record
* doesn't get coalesced by someone else! */
r->magic_and_meta = TDB_COALESCING_MAGIC | zone_bits;
r->magic_and_meta = TDB_COALESCING_MAGIC;
r->data_len = end - off - sizeof(struct tdb_used_record);
if (tdb_access_commit(tdb, r) != 0)
goto err;
tdb_unlock_free_bucket(tdb, b_off);
if (add_free_record(tdb, zone_bits, off, end - off) == -1)
if (add_free_record(tdb, off, end - off) == -1)
return -1;
return 1;
......@@ -402,8 +307,7 @@ err:
/* We need size bytes to put our key and data in. */
static tdb_off_t lock_and_alloc(struct tdb_context *tdb,
tdb_off_t zone_off,
unsigned zone_bits,
tdb_off_t flist_off,
tdb_off_t bucket,
size_t keylen, size_t datalen,
bool want_extra,
......@@ -415,11 +319,9 @@ static tdb_off_t lock_and_alloc(struct tdb_context *tdb,
size_t size = adjust_size(keylen, datalen);
again:
b_off = bucket_off(zone_off, bucket);
b_off = bucket_off(flist_off, bucket);
/* FIXME: Try non-blocking wait first, to measure contention.
* If we're contented, try switching zones, and don't enlarge zone
* next time (we want more zones). */
/* FIXME: Try non-blocking wait first, to measure contention. */
/* Lock this bucket. */
if (tdb_lock_free_bucket(tdb, b_off, TDB_LOCK_WAIT) == -1) {
return TDB_OFF_ERR;
......@@ -464,8 +366,7 @@ again:
multiplier *= 1.01;
/* Since we're going slow anyway, try coalescing here. */
switch (coalesce(tdb, zone_off, zone_bits, off, b_off,
r->data_len)) {
switch (coalesce(tdb, off, b_off, r->data_len)) {
case -1:
/* This has already unlocked on error. */
return -1;
......@@ -493,7 +394,7 @@ again:
* coalesce() could try to merge it! */
if (set_header(tdb, &rec, keylen, datalen,
best.data_len - leftover,
hashlow, zone_bits) != 0)
hashlow) != 0)
goto unlock_err;
if (tdb_write_convert(tdb, best_off, &rec, sizeof(rec)) != 0)
......@@ -502,7 +403,7 @@ again:
tdb_unlock_free_bucket(tdb, b_off);
if (leftover) {
if (add_free_record(tdb, zone_bits,
if (add_free_record(tdb,
best_off + sizeof(rec)
+ best.data_len - leftover,
leftover))
......@@ -519,50 +420,27 @@ unlock_err:
return TDB_OFF_ERR;
}
static bool next_zone(struct tdb_context *tdb)
{
tdb_off_t next = tdb->zone_off + (1ULL << tdb->zhdr.zone_bits);
/* We must have a header. */
if (tdb->methods->oob(tdb, next + sizeof(tdb->zhdr), true))
return false;
tdb->zone_off = next;
return tdb_read_convert(tdb, next, &tdb->zhdr, sizeof(tdb->zhdr)) == 0;
}
/* Offset returned is within current zone (which it may alter). */
/* Get a free block from current free list, or 0 if none. */
static tdb_off_t get_free(struct tdb_context *tdb,
size_t keylen, size_t datalen, bool want_extra,
unsigned hashlow)
{
tdb_off_t start_zone = tdb->zone_off, off;
bool wrapped = false;
size_t size = adjust_size(keylen, datalen);
tdb_off_t off;
unsigned start_b, b;
/* If they are growing, add 50% to get to higher bucket. */
if (want_extra)
size += datalen / 2;
/* FIXME: If we don't get a hit in the first bucket we want,
* try changing zones for next time. That should help wear
* zones evenly, so we don't need to search all of them before
* expanding. */
while (!wrapped || tdb->zone_off != start_zone) {
tdb_off_t b;
/* Shortcut for really huge allocations... */
if ((size >> tdb->zhdr.zone_bits) != 0)
goto next;
start_b = size_to_bucket(adjust_size(keylen,
datalen + datalen / 2));
else
start_b = size_to_bucket(adjust_size(keylen, datalen));
/* Start at exact size bucket, and search up... */
b = size_to_bucket(tdb->zhdr.zone_bits, size);
for (b = find_free_head(tdb, b);
b <= BUCKETS_FOR_ZONE(tdb->zhdr.zone_bits);
for (b = find_free_head(tdb, start_b);
b < TDB_FREE_BUCKETS;
b = find_free_head(tdb, b + 1)) {
/* Try getting one from list. */
off = lock_and_alloc(tdb, tdb->zone_off,
tdb->zhdr.zone_bits,
off = lock_and_alloc(tdb, tdb->flist_off,
b, keylen, datalen, want_extra,
hashlow);
if (off == TDB_OFF_ERR)
......@@ -571,33 +449,18 @@ static tdb_off_t get_free(struct tdb_context *tdb,
return off;
/* Didn't work. Try next bucket. */
}
next:
/* Didn't work, try next zone, if it exists. */
if (!next_zone(tdb)) {
wrapped = true;
tdb->zone_off = sizeof(struct tdb_header);
if (tdb_read_convert(tdb, tdb->zone_off,
&tdb->zhdr, sizeof(tdb->zhdr))) {
return TDB_OFF_ERR;
}
}
}
return 0;
}
int set_header(struct tdb_context *tdb,
struct tdb_used_record *rec,
uint64_t keylen, uint64_t datalen,
uint64_t actuallen, unsigned hashlow,
unsigned int zone_bits)
uint64_t actuallen, unsigned hashlow)
{
uint64_t keybits = (fls64(keylen) + 1) / 2;
/* Use bottom bits of hash, so it's independent of hash table size. */
rec->magic_and_meta
= zone_bits
| ((hashlow & ((1 << 5)-1)) << 6)
rec->magic_and_meta = (hashlow & ((1 << 11)-1))
| ((actuallen - (keylen + datalen)) << 11)
| (keybits << 43)
| (TDB_MAGIC << 48);
......@@ -617,26 +480,11 @@ int set_header(struct tdb_context *tdb,
return 0;
}
static bool zones_contended(struct tdb_context *tdb)
{
return false;
}
/* Assume we want buckets up to the comfort factor. */
static tdb_len_t overhead(unsigned int zone_bits)
{
return sizeof(struct free_zone_header)
+ (BUCKETS_FOR_ZONE(zone_bits) + 1) * sizeof(tdb_off_t);
}
/* Expand the database (by adding a zone). */
/* Expand the database. */
static int tdb_expand(struct tdb_context *tdb, tdb_len_t size)
{
uint64_t old_size;
tdb_off_t off;
unsigned int num_buckets, zone_bits;
tdb_len_t wanted, expand;
struct free_zone_header zhdr;
tdb_len_t wanted;
/* We need room for the record header too. */
wanted = sizeof(struct tdb_used_record) + size;
......@@ -648,91 +496,20 @@ static int tdb_expand(struct tdb_context *tdb, tdb_len_t size)
/* Someone else may have expanded the file, so retry. */
old_size = tdb->map_size;
tdb->methods->oob(tdb, tdb->map_size + 1, true);
if (tdb->map_size != old_size)
goto success;
/* Treat last zone as minimum reasonable zone size. */
off = last_zone(tdb, &zhdr);
if (off == TDB_OFF_ERR)
goto fail;
/* Zone isn't fully expanded? */
if (tdb->map_size < off + (1ULL << zhdr.zone_bits)) {
expand = off + (1ULL << zhdr.zone_bits) - tdb->map_size;
/* Expand more than we want. */
if (expand > (wanted << TDB_COMFORT_FACTOR_BITS))
expand = (wanted << TDB_COMFORT_FACTOR_BITS);
if (tdb->methods->expand_file(tdb, expand) == -1)
goto fail;
/* We need to drop this lock before adding free record. */
if (tdb->map_size != old_size) {
tdb_unlock_expand(tdb, F_WRLCK);
/* Allocate from here. */
tdb->zone_off = off;
tdb->zhdr = zhdr;
/* FIXME: If this isn't sufficient, we search again... */
return add_free_record(tdb, zhdr.zone_bits,
tdb->map_size - expand, expand);
return 0;
}
/* We are never allowed to cross a power-of-two boundary, and our
* minimum zone size is 1 << INITIAL_ZONE_BITS.
*
* If our filesize is 128k, we can add a 64k or a 128k zone. If it's
* 192k, we can only add a 64k zone.
*
* In other words, our max zone size is (1 << (ffs(filesize) - 1)) */
zone_bits = ffs64(old_size - sizeof(struct tdb_header)) - 1;
assert(zone_bits >= INITIAL_ZONE_BITS);
/* Big zones generally good, but more zones wanted if contended. */
if (zones_contended(tdb)) {
/* If it suffices, make zone same size as last one. */
if (zhdr.zone_bits < zone_bits
&& (1ULL << zhdr.zone_bits) >= overhead(zone_bits)+wanted)
zone_bits = zhdr.zone_bits;
if (tdb->methods->expand_file(tdb, wanted*TDB_EXTENSION_FACTOR) == -1) {
tdb_unlock_expand(tdb, F_WRLCK);
return -1;
}
zhdr.zone_bits = zone_bits;
num_buckets = BUCKETS_FOR_ZONE(zone_bits);
/* Expand the file by more than we need right now. */
expand = 1ULL << zone_bits;
if (expand > overhead(zone_bits) + (wanted << TDB_COMFORT_FACTOR_BITS))
expand = overhead(zone_bits)
+ (wanted << TDB_COMFORT_FACTOR_BITS);
if (tdb->methods->expand_file(tdb, expand) == -1)
goto fail;
/* Write new zone header (at old end). */
off = old_size;
if (tdb_write_convert(tdb, off, &zhdr, sizeof(zhdr)) == -1)
goto fail;
/* Now write empty buckets. */
off += sizeof(zhdr);
if (zero_out(tdb, off, (num_buckets+1) * sizeof(tdb_off_t)) == -1)
goto fail;
off += (num_buckets+1) * sizeof(tdb_off_t);
/* Now add the rest as our free record. */
if (add_free_record(tdb, zone_bits, off, expand - overhead(zone_bits))
== -1)
goto fail;
/* Try allocating from this zone now. */
tdb->zone_off = old_size;
tdb->zhdr = zhdr;
success:
/* We need to drop this lock before adding free record. */
tdb_unlock_expand(tdb, F_WRLCK);
return 0;
fail:
tdb_unlock_expand(tdb, F_WRLCK);
return -1;
return add_free_record(tdb, old_size, wanted * TDB_EXTENSION_FACTOR);
}
/* This won't fail: it will expand the database if it has to. */
......
......@@ -257,7 +257,7 @@ static int tdb_nest_lock(struct tdb_context *tdb, tdb_off_t offset, int ltype,
{
struct tdb_lock_type *new_lck;
if (offset >= TDB_HASH_LOCK_START + TDB_HASH_LOCK_RANGE + tdb->map_size / 8) {
if (offset > TDB_HASH_LOCK_START + TDB_HASH_LOCK_RANGE + tdb->map_size / 8) {
tdb->ecode = TDB_ERR_LOCK;
tdb->log(tdb, TDB_DEBUG_FATAL, tdb->log_priv,
"tdb_nest_lock: invalid offset %llu ltype=%d\n",
......
......@@ -92,10 +92,8 @@ typedef uint64_t tdb_off_t;
/* And 8 entries in each group, ie 8 groups per sublevel. */
#define TDB_HASH_GROUP_BITS 3
/* We start with a 64k-sized zone. */
#define INITIAL_ZONE_BITS 16
/* Try to create zones at least 32 times larger than allocations. */
#define TDB_COMFORT_FACTOR_BITS 5
/* Extend file by least 32 times larger than needed. */
#define TDB_EXTENSION_FACTOR 32
/* We steal bits from the offsets to store hash info. */
#define TDB_OFF_HASH_GROUP_MASK ((1ULL << TDB_HASH_GROUP_BITS) - 1)
......@@ -111,14 +109,13 @@ typedef uint64_t tdb_off_t;
#define TDB_OFF_MASK \
(((1ULL << (64 - TDB_OFF_UPPER_STEAL)) - 1) - TDB_OFF_HASH_GROUP_MASK)
/* How many buckets in a free list: see size_to_bucket(). */
#define TDB_FREE_BUCKETS (64 - TDB_OFF_UPPER_STEAL)
/* We have to be able to fit a free record here. */
#define TDB_MIN_DATA_LEN \
(sizeof(struct tdb_free_record) - sizeof(struct tdb_used_record))
/* We ensure buckets up to size 1 << (zone_bits - TDB_COMFORT_FACTOR_BITS). */
/* FIXME: test this matches size_to_bucket! */
#define BUCKETS_FOR_ZONE(zone_bits) ((zone_bits) + 2 - TDB_COMFORT_FACTOR_BITS)
#if !HAVE_BSWAP_64
static inline uint64_t bswap_64(uint64_t x)
{
......@@ -138,8 +135,7 @@ struct tdb_used_record {
magic: 16, (highest)
key_len_bits: 5,
extra_padding: 32
hash_bits: 5,
zone_bits: 6 (lowest)
hash_bits: 11
*/
uint64_t magic_and_meta;
/* The bottom key_len_bits*2 are key length, rest is data length. */
......@@ -166,14 +162,9 @@ static inline uint64_t rec_extra_padding(const struct tdb_used_record *r)
return (r->magic_and_meta >> 11) & 0xFFFFFFFF;
}
static inline unsigned int rec_zone_bits(const struct tdb_used_record *r)
{
return r->magic_and_meta & ((1 << 6) - 1);
}
static inline uint32_t rec_hash(const struct tdb_used_record *r)
{
return (r->magic_and_meta >> 6) & ((1 << 5) - 1);
return r->magic_and_meta & ((1 << 11) - 1);
}
static inline uint16_t rec_magic(const struct tdb_used_record *r)
......@@ -182,32 +173,17 @@ static inline uint16_t rec_magic(const struct tdb_used_record *r)
}
struct tdb_free_record {
uint64_t magic_and_meta; /* Bottom 6 bits are zone bits. */
uint64_t magic_and_meta;
uint64_t data_len; /* Not counting these two fields. */
/* This is why the minimum record size is 16 bytes. */
uint64_t next, prev;
};
static inline unsigned int frec_zone_bits(const struct tdb_free_record *f)
{
return f->magic_and_meta & ((1 << 6) - 1);
}
static inline uint64_t frec_magic(const struct tdb_free_record *f)
{
return f->magic_and_meta & ~((1ULL << 6) - 1);
return f->magic_and_meta;
}
/* Each zone has its set of free lists at the head.
*
* For each zone we have a series of per-size buckets, and a final bucket for
* "too big". */
struct free_zone_header {
/* How much does this zone cover? */
uint64_t zone_bits;
/* tdb_off_t buckets[free_buckets + 1] */
};
/* this is stored at the front of every database */
struct tdb_header {
char magic_food[64]; /* for /etc/magic */
......@@ -215,13 +191,19 @@ struct tdb_header {
uint64_t version; /* version of the code */
uint64_t hash_test; /* result of hashing HASH_MAGIC. */
uint64_t hash_seed; /* "random" seed written at creation time. */
tdb_off_t free_list; /* (First) free list. */
tdb_off_t reserved[28];
tdb_off_t reserved[27];
/* Top level hash table. */
tdb_off_t hashtable[1ULL << TDB_TOPLEVEL_HASH_BITS];
};
struct tdb_freelist {
struct tdb_used_record hdr;
tdb_off_t buckets[TDB_FREE_BUCKETS];
};
/* Information about a particular (locked) hash entry. */
struct hash_info {
/* Full hash value of entry. */
......@@ -308,10 +290,8 @@ struct tdb_context {
/* Set if we are in a transaction. */
struct tdb_transaction *transaction;
/* What zone of the tdb to use, for spreading load. */
uint64_t zone_off;
/* Cached copy of zone header. */
struct free_zone_header zhdr;
/* What freelist are we using? */
uint64_t flist_off;
/* IO methods: changes for transactions. */
const struct tdb_methods *methods;
......@@ -368,26 +348,25 @@ int delete_from_hash(struct tdb_context *tdb, struct hash_info *h);
bool is_subhash(tdb_off_t val);
/* free.c: */
int tdb_zone_init(struct tdb_context *tdb);
int tdb_flist_init(struct tdb_context *tdb);
/* If this fails, try tdb_expand. */
tdb_off_t alloc(struct tdb_context *tdb, size_t keylen, size_t datalen,
uint64_t hash, bool growing);
/* Put this record in a free list. */
int add_free_record(struct tdb_context *tdb, unsigned int zone_bits,
int add_free_record(struct tdb_context *tdb,
tdb_off_t off, tdb_len_t len_with_header);
/* Set up header for a used record. */
int set_header(struct tdb_context *tdb,
struct tdb_used_record *rec,
uint64_t keylen, uint64_t datalen,
uint64_t actuallen, unsigned hashlow,
unsigned int zone_bits);
uint64_t actuallen, unsigned hashlow);
/* Used by tdb_check to verify. */
unsigned int size_to_bucket(unsigned int free_buckets, tdb_len_t data_len);
tdb_off_t bucket_off(tdb_off_t zone_off, tdb_off_t bucket);
unsigned int size_to_bucket(tdb_len_t data_len);
tdb_off_t bucket_off(tdb_off_t flist_off, unsigned bucket);
/* io.c: */
/* Initialize tdb->methods. */
......
......@@ -19,25 +19,6 @@
#include <assert.h>
#include <ccan/tally/tally.h>
static void sizes_for_bucket(unsigned bucket, size_t *min, size_t *max)
{
if (bucket <= 8) {
*min = *max = TDB_MIN_DATA_LEN + bucket * 8;
} else if (bucket == 9) {
/* FIXME: This is twisted; fix size_to_bucket. */
*min = TDB_MIN_DATA_LEN + (1ULL << (bucket - 3)) + 8;
*max = TDB_MIN_DATA_LEN + (1ULL << (bucket - 2)) - 8;
} else {
*min = TDB_MIN_DATA_LEN + (1ULL << (bucket - 3));
*max = TDB_MIN_DATA_LEN + (1ULL << (bucket - 2)) - 8;
}
assert(size_to_bucket(63, *min) == bucket);
assert(size_to_bucket(63, *max) == bucket);
if (bucket > 8)
assert(size_to_bucket(63, *min - 8) == bucket - 1);
assert(size_to_bucket(63, *max + 8) == bucket + 1);
}
static int count_hash(struct tdb_context *tdb,
tdb_off_t hash_off, unsigned bits)
{
......@@ -54,48 +35,32 @@ static int count_hash(struct tdb_context *tdb,
return count;
}
static tdb_len_t summarize_zone(struct tdb_context *tdb, tdb_off_t zone_off,
struct tally *zones,
static bool summarize(struct tdb_context *tdb,
struct tally *hashes,
struct tally *flists,
struct tally *free,
struct tally *keys,
struct tally *data,
struct tally *extra,
struct tally *uncoal,
uint64_t bucketlen[],
unsigned int *num_buckets)
struct tally *buckets)
{
struct free_zone_header zhdr;
tdb_off_t off, end;
tdb_off_t off;
tdb_len_t len;
unsigned int hdrlen;
tdb_len_t unc = 0;
if (tdb_read_convert(tdb, zone_off, &zhdr, sizeof(zhdr)) == -1)
return TDB_OFF_ERR;
tally_add(zones, 1ULL << zhdr.zone_bits);
*num_buckets = BUCKETS_FOR_ZONE(zhdr.zone_bits);
hdrlen = sizeof(zhdr)
+ (BUCKETS_FOR_ZONE(zhdr.zone_bits) + 1) * sizeof(tdb_off_t);
end = zone_off + (1ULL << zhdr.zone_bits);
if (end > tdb->map_size)
end = tdb->map_size;
for (off = zone_off + hdrlen; off < end; off += len) {
for (off = sizeof(struct tdb_header); off < tdb->map_size; off += len) {
union {
struct tdb_used_record u;
struct tdb_free_record f;
} pad, *p;
p = tdb_get(tdb, off, &pad, sizeof(pad));
if (!p)
return TDB_OFF_ERR;
return false;
if (rec_magic(&p->u) != TDB_MAGIC) {
len = p->f.data_len;
tally_add(free, len);
bucketlen[size_to_bucket(frec_zone_bits(&p->f), len)]++;
tally_add(buckets, size_to_bucket(len));
len += sizeof(p->u);
unc++;
} else {
......@@ -108,13 +73,19 @@ static tdb_len_t summarize_zone(struct tdb_context *tdb, tdb_off_t zone_off,
+ rec_data_length(&p->u)
+ rec_extra_padding(&p->u);
/* FIXME: Use different magic for hashes? */
if (!rec_key_length(&p->u) && !rec_hash(&p->u)) {
int count = count_hash(tdb, off + sizeof(p->u),
/* FIXME: Use different magic for hashes, flists. */
if (!rec_key_length(&p->u) && rec_hash(&p->u) < 2) {
if (rec_hash(&p->u) == 0) {
int count = count_hash(tdb,
off + sizeof(p->u),
TDB_SUBLEVEL_HASH_BITS);
if (count == -1)
return TDB_OFF_ERR;
return false;
tally_add(hashes, count);
} else {
tally_add(flists,
rec_data_length(&p->u));
}
} else {
tally_add(keys, rec_key_length(&p->u));
tally_add(data, rec_data_length(&p->u));
......@@ -124,13 +95,11 @@ static tdb_len_t summarize_zone(struct tdb_context *tdb, tdb_off_t zone_off,
}
if (unc)
tally_add(uncoal, unc);
return 1ULL << zhdr.zone_bits;
return true;
}
#define SUMMARY_FORMAT \
"Size of file/data: %zu/%zu\n" \
"Number of zones: %zu\n" \
"Smallest/average/largest zone size: %zu/%zu/%zu\n%s" \
"Number of records: %zu\n" \
"Smallest/average/largest keys: %zu/%zu/%zu\n%s" \
"Smallest/average/largest data: %zu/%zu/%zu\n%s" \
......@@ -139,10 +108,11 @@ static tdb_len_t summarize_zone(struct tdb_context *tdb, tdb_off_t zone_off,
"Smallest/average/largest free records: %zu/%zu/%zu\n%s" \
"Number of uncoalesced records: %zu\n" \
"Smallest/average/largest uncoalesced runs: %zu/%zu/%zu\n%s" \
"Number of free lists: %zu\n%s" \
"Toplevel hash used: %u of %u\n" \
"Number of subhashes: %zu\n" \
"Smallest/average/largest subhash entries: %zu/%zu/%zu\n%s" \
"Percentage keys/data/padding/free/rechdrs/zonehdrs/hashes: %.0f/%.0f/%.0f/%.0f/%.0f/%.0f/%.0f\n"
"Percentage keys/data/padding/free/rechdrs/freehdrs/hashes: %.0f/%.0f/%.0f/%.0f/%.0f/%.0f/%.0f\n"
#define BUCKET_SUMMARY_FORMAT_A \
"Free bucket %zu: total entries %zu.\n" \
......@@ -156,17 +126,13 @@ static tdb_len_t summarize_zone(struct tdb_context *tdb, tdb_off_t zone_off,
char *tdb_summary(struct tdb_context *tdb, enum tdb_summary_flags flags)
{
tdb_off_t off;
tdb_len_t len;
unsigned int i, num_buckets, max_bucket = 0;
uint64_t total_buckets = 0;
struct tally *zones, *hashes, *freet, *keys, *data, *extra, *uncoal,
*buckets[BUCKETS_FOR_ZONE(63)+1] = { NULL };
char *zonesg, *hashesg, *freeg, *keysg, *datag, *extrag, *uncoalg,
*bucketsg[BUCKETS_FOR_ZONE(63)+1] = { NULL };
struct tally *flists, *hashes, *freet, *keys, *data, *extra, *uncoal,
*buckets;
char *hashesg, *freeg, *keysg, *datag, *extrag, *uncoalg, *bucketsg;
char *ret = NULL;
zonesg = hashesg = freeg = keysg = datag = extrag = uncoalg = NULL;
hashesg = freeg = keysg = datag = extrag = uncoalg = bucketsg = NULL;
if (tdb_allrecord_lock(tdb, F_RDLCK, TDB_LOCK_WAIT, false) != 0)
return NULL;
......@@ -177,71 +143,43 @@ char *tdb_summary(struct tdb_context *tdb, enum tdb_summary_flags flags)
}
/* Start stats off empty. */
zones = tally_new(HISTO_HEIGHT);
flists = tally_new(HISTO_HEIGHT);
hashes = tally_new(HISTO_HEIGHT);
freet = tally_new(HISTO_HEIGHT);
keys = tally_new(HISTO_HEIGHT);
data = tally_new(HISTO_HEIGHT);
extra = tally_new(HISTO_HEIGHT);
uncoal = tally_new(HISTO_HEIGHT);
if (!zones || !hashes || !freet || !keys || !data || !extra
|| !uncoal) {
buckets = tally_new(HISTO_HEIGHT);
if (!flists || !hashes || !freet || !keys || !data || !extra
|| !uncoal || !buckets) {
tdb->ecode = TDB_ERR_OOM;
goto unlock;
}
for (i = 0; i < sizeof(buckets)/sizeof(buckets[0]); i++) {
buckets[i] = tally_new(HISTO_HEIGHT);
if (!buckets[i]) {
tdb->ecode = TDB_ERR_OOM;
if (!summarize(tdb, hashes, flists, freet, keys, data, extra, uncoal,
buckets))
goto unlock;
}
}
for (off = sizeof(struct tdb_header);
off < tdb->map_size - 1;
off += len) {
uint64_t bucketlen[BUCKETS_FOR_ZONE(63)+1] = { 0 };
len = summarize_zone(tdb, off, zones, hashes, freet, keys,
data, extra, uncoal, bucketlen,
&num_buckets);
if (len == TDB_OFF_ERR)
goto unlock;
for (i = 0; i < num_buckets; i++)
tally_add(buckets[i], bucketlen[i]);
if (num_buckets > max_bucket)
max_bucket = num_buckets;
total_buckets += num_buckets;
}
if (flags & TDB_SUMMARY_HISTOGRAMS) {
zonesg = tally_histogram(zones, HISTO_WIDTH, HISTO_HEIGHT);
hashesg = tally_histogram(hashes, HISTO_WIDTH, HISTO_HEIGHT);
freeg = tally_histogram(freet, HISTO_WIDTH, HISTO_HEIGHT);
keysg = tally_histogram(keys, HISTO_WIDTH, HISTO_HEIGHT);
datag = tally_histogram(data, HISTO_WIDTH, HISTO_HEIGHT);
extrag = tally_histogram(extra, HISTO_WIDTH, HISTO_HEIGHT);
uncoalg = tally_histogram(uncoal, HISTO_WIDTH, HISTO_HEIGHT);
for (i = 0; i < sizeof(buckets)/sizeof(buckets[0]); i++) {
bucketsg[i] = tally_histogram(buckets[i],
HISTO_WIDTH,
HISTO_HEIGHT);
}
bucketsg = tally_histogram(buckets, HISTO_WIDTH, HISTO_HEIGHT);
}
/* 20 is max length of a %llu. */
len = strlen(SUMMARY_FORMAT) + 33*20 + 1
+ (zonesg ? strlen(zonesg) : 0)
+ (hashesg ? strlen(hashesg) : 0)
+ (freeg ? strlen(freeg) : 0)
+ (keysg ? strlen(keysg) : 0)
+ (datag ? strlen(datag) : 0)
+ (extrag ? strlen(extrag) : 0)
+ (uncoalg ? strlen(uncoalg) : 0);
for (i = 0; i < max_bucket; i++) {
len += strlen(BUCKET_SUMMARY_FORMAT_B) + 6 * 20
+ (bucketsg[i] ? strlen(bucketsg[i]) : 0);
}
+ (uncoalg ? strlen(uncoalg) : 0)
+ (bucketsg ? strlen(bucketsg) : 0);
ret = malloc(len);
if (!ret)
......@@ -250,9 +188,6 @@ char *tdb_summary(struct tdb_context *tdb, enum tdb_summary_flags flags)
len = sprintf(ret, SUMMARY_FORMAT,
(size_t)tdb->map_size,
tally_num(keys) + tally_num(data),
tally_num(zones),
tally_min(zones), tally_mean(zones), tally_max(zones),
zonesg ? zonesg : "",
tally_num(keys),
tally_min(keys), tally_mean(keys), tally_max(keys),
keysg ? keysg : "",
......@@ -266,6 +201,8 @@ char *tdb_summary(struct tdb_context *tdb, enum tdb_summary_flags flags)
tally_total(uncoal, NULL),
tally_min(uncoal), tally_mean(uncoal), tally_max(uncoal),
uncoalg ? uncoalg : "",
tally_num(buckets),
bucketsg ? bucketsg : "",
count_hash(tdb, offsetof(struct tdb_header, hashtable),
TDB_TOPLEVEL_HASH_BITS),
1 << TDB_TOPLEVEL_HASH_BITS,
......@@ -278,53 +215,28 @@ char *tdb_summary(struct tdb_context *tdb, enum tdb_summary_flags flags)
tally_total(freet, NULL) * 100.0 / tdb->map_size,
(tally_num(keys) + tally_num(freet) + tally_num(hashes))
* sizeof(struct tdb_used_record) * 100.0 / tdb->map_size,
(tally_num(zones) * sizeof(struct free_zone_header)
+ total_buckets * sizeof(tdb_off_t))
tally_num(flists) * sizeof(struct tdb_freelist)
* 100.0 / tdb->map_size,
(tally_num(hashes)
* (sizeof(tdb_off_t) << TDB_SUBLEVEL_HASH_BITS)
+ (sizeof(tdb_off_t) << TDB_TOPLEVEL_HASH_BITS))
* 100.0 / tdb->map_size);
for (i = 0; i < max_bucket; i++) {
size_t min, max;
sizes_for_bucket(i, &min, &max);
if (min == max) {
len += sprintf(ret + len, BUCKET_SUMMARY_FORMAT_A,
min, tally_total(buckets[i], NULL),
tally_min(buckets[i]),
tally_mean(buckets[i]),
tally_max(buckets[i]),
bucketsg[i] ? bucketsg[i] : "");
} else {
len += sprintf(ret + len, BUCKET_SUMMARY_FORMAT_B,
min, max, tally_total(buckets[i], NULL),
tally_min(buckets[i]),
tally_mean(buckets[i]),
tally_max(buckets[i]),
bucketsg[i] ? bucketsg[i] : "");
}
}
unlock:
free(zonesg);
free(hashesg);
free(freeg);
free(keysg);
free(datag);
free(extrag);
free(uncoalg);
free(zones);
free(bucketsg);
free(hashes);
free(buckets);
free(freet);
free(keys);
free(data);
free(extra);
free(uncoal);
for (i = 0; i < sizeof(buckets)/sizeof(buckets[0]); i++) {
free(buckets[i]);
free(bucketsg[i]);
}
tdb_allrecord_unlock(tdb, F_RDLCK);
tdb_unlock_expand(tdb, F_RDLCK);
......
......@@ -80,9 +80,7 @@ static uint64_t random_number(struct tdb_context *tdb)
struct new_database {
struct tdb_header hdr;
/* Initial free zone. */
struct free_zone_header zhdr;
tdb_off_t free[BUCKETS_FOR_ZONE(INITIAL_ZONE_BITS) + 1];
struct tdb_freelist flist;
};
/* initialise a new database */
......@@ -110,8 +108,10 @@ static int tdb_new_database(struct tdb_context *tdb,
memset(newdb.hdr.hashtable, 0, sizeof(newdb.hdr.hashtable));
/* Free is empty. */
newdb.zhdr.zone_bits = INITIAL_ZONE_BITS;
memset(newdb.free, 0, sizeof(newdb.free));
newdb.hdr.free_list = offsetof(struct new_database, flist);
memset(&newdb.flist, 0, sizeof(newdb.flist));
set_header(NULL, &newdb.flist.hdr, 0,
sizeof(newdb.flist.buckets), sizeof(newdb.flist.buckets), 1);
/* Magic food */
memset(newdb.hdr.magic_food, 0, sizeof(newdb.hdr.magic_food));
......@@ -176,7 +176,6 @@ struct tdb_context *tdb_open(const char *name, int tdb_flags,
tdb->log_priv = NULL;
tdb->transaction = NULL;
tdb_hash_init(tdb);
/* last_zone will be set below. */
tdb_io_init(tdb);
tdb_lock_init(tdb);
......@@ -230,7 +229,7 @@ struct tdb_context *tdb_open(const char *name, int tdb_flags,
}
tdb_convert(tdb, &hdr.hash_seed, sizeof(hdr.hash_seed));
tdb->hash_seed = hdr.hash_seed;
tdb_zone_init(tdb);
tdb_flist_init(tdb);
return tdb;
}
......@@ -314,8 +313,7 @@ struct tdb_context *tdb_open(const char *name, int tdb_flags,
/* This make sure we have current map_size and mmap. */
tdb->methods->oob(tdb, tdb->map_size + 1, true);
/* Now we can pick a random free zone to start from. */
if (tdb_zone_init(tdb) == -1)
if (tdb_flist_init(tdb) == -1)
goto fail;
tdb->next = tdbs;
......@@ -358,8 +356,7 @@ static int update_rec_hdr(struct tdb_context *tdb,
{
uint64_t dataroom = rec_data_length(rec) + rec_extra_padding(rec);
if (set_header(tdb, rec, keylen, datalen, keylen + dataroom, h,
rec_zone_bits(rec)))
if (set_header(tdb, rec, keylen, datalen, keylen + dataroom, h))
return -1;
return tdb_write_convert(tdb, off, rec, sizeof(*rec));
......@@ -370,7 +367,6 @@ static int replace_data(struct tdb_context *tdb,
struct hash_info *h,
struct tdb_data key, struct tdb_data dbuf,
tdb_off_t old_off, tdb_len_t old_room,
unsigned old_zone,
bool growing)
{
tdb_off_t new_off;
......@@ -382,7 +378,7 @@ static int replace_data(struct tdb_context *tdb,
/* We didn't like the existing one: remove it. */
if (old_off) {
add_free_record(tdb, old_zone, old_off,
add_free_record(tdb, old_off,
sizeof(struct tdb_used_record)
+ key.dsize + old_room);
if (replace_in_hash(tdb, h, new_off) == -1)
......@@ -454,8 +450,7 @@ int tdb_store(struct tdb_context *tdb,
}
/* If we didn't use the old record, this implies we're growing. */
ret = replace_data(tdb, &h, key, dbuf, off, old_room,
rec_zone_bits(&rec), off != 0);
ret = replace_data(tdb, &h, key, dbuf, off, old_room, off != 0);
tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range, F_WRLCK);
return ret;
......@@ -524,8 +519,7 @@ int tdb_append(struct tdb_context *tdb,
}
/* If they're using tdb_append(), it implies they're growing record. */
ret = replace_data(tdb, &h, key, new_dbuf, off,
old_room, rec_zone_bits(&rec), true);
ret = replace_data(tdb, &h, key, new_dbuf, off, old_room, true);
tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range, F_WRLCK);
free(newdata);
......@@ -580,7 +574,7 @@ int tdb_delete(struct tdb_context *tdb, struct tdb_data key)
goto unlock_err;
/* Free the deleted entry. */
if (add_free_record(tdb, rec_zone_bits(&rec), off,
if (add_free_record(tdb, off,
sizeof(struct tdb_used_record)
+ rec_key_length(&rec)
+ rec_data_length(&rec)
......
......@@ -23,15 +23,10 @@ static void add(struct tdb_layout *layout, union tdb_layout_elem elem)
layout->elem[layout->num_elems++] = elem;
}
void tdb_layout_add_zone(struct tdb_layout *layout,
unsigned int zone_bits,
bool fill_prev)
void tdb_layout_add_freelist(struct tdb_layout *layout)
{
union tdb_layout_elem elem;
if (fill_prev)
tdb_layout_add_free(layout, 0);
elem.base.type = ZONE;
elem.zone.zone_bits = zone_bits;
elem.base.type = FREELIST;
add(layout, elem);
}
......@@ -85,10 +80,9 @@ static tdb_len_t hashtable_len(struct tle_hashtable *htable)
+ htable->extra;
}
static tdb_len_t zone_header_len(struct tle_zone *zone)
static tdb_len_t freelist_len(struct tle_freelist *flist)
{
return sizeof(struct free_zone_header)
+ sizeof(tdb_off_t) * (BUCKETS_FOR_ZONE(zone->zone_bits)+1);
return sizeof(struct tdb_freelist);
}
static void set_free_record(void *mem, tdb_len_t len)
......@@ -97,47 +91,43 @@ static void set_free_record(void *mem, tdb_len_t len)
}
static void set_data_record(void *mem, struct tdb_context *tdb,
struct tle_zone *last_zone,
struct tle_used *used)
{
struct tdb_used_record *u = mem;
set_header(tdb, u, used->key.dsize, used->data.dsize,
used->key.dsize + used->data.dsize + used->extra,
tdb_hash(tdb, used->key.dptr, used->key.dsize),
last_zone->zone_bits);
tdb_hash(tdb, used->key.dptr, used->key.dsize));
memcpy(u + 1, used->key.dptr, used->key.dsize);
memcpy((char *)(u + 1) + used->key.dsize,
used->data.dptr, used->data.dsize);
}
static void set_hashtable(void *mem, struct tdb_context *tdb,
struct tle_zone *last_zone,
struct tle_hashtable *htable)
{
struct tdb_used_record *u = mem;
tdb_len_t len = sizeof(tdb_off_t) << TDB_SUBLEVEL_HASH_BITS;
set_header(tdb, u, 0, len, len + htable->extra, 0,
last_zone->zone_bits);
set_header(tdb, u, 0, len, len + htable->extra, 0);
memset(u + 1, 0, len);
}
static void set_zone(void *mem, struct tdb_context *tdb,
struct tle_zone *zone)
static void set_freelist(void *mem, struct tdb_context *tdb,
struct tle_freelist *freelist)
{
struct free_zone_header *fz = mem;
memset(fz, 0, zone_header_len(zone));
fz->zone_bits = zone->zone_bits;
struct tdb_freelist *flist = mem;
memset(flist, 0, sizeof(*flist));
set_header(tdb, &flist->hdr, 0,
sizeof(*flist) - sizeof(flist->hdr),
sizeof(*flist) - sizeof(flist->hdr), 1);
}
static void add_to_freetable(struct tdb_context *tdb,
struct tle_zone *last_zone,
tdb_off_t eoff,
tdb_off_t elen)
{
add_free_record(tdb, last_zone->zone_bits, eoff,
sizeof(struct tdb_used_record) + elen);
add_free_record(tdb, eoff, sizeof(struct tdb_used_record) + elen);
}
static tdb_off_t hbucket_off(tdb_off_t group_start, unsigned ingroup)
......@@ -204,15 +194,10 @@ static void add_to_hashtable(struct tdb_context *tdb,
struct tdb_context *tdb_layout_get(struct tdb_layout *layout)
{
unsigned int i;
tdb_off_t off, len;
tdb_len_t zone_left;
tdb_off_t off, len, flist_off = 0;
char *mem;
struct tdb_context *tdb;
struct tle_zone *last_zone = NULL;
assert(layout->elem[0].base.type == ZONE);
zone_left = 0;
off = sizeof(struct tdb_header);
/* First pass of layout: calc lengths */
......@@ -220,15 +205,12 @@ struct tdb_context *tdb_layout_get(struct tdb_layout *layout)
union tdb_layout_elem *e = &layout->elem[i];
e->base.off = off;
switch (e->base.type) {
case ZONE:
assert(zone_left == 0);
len = zone_header_len(&e->zone);
zone_left = 1ULL << e->zone.zone_bits;
case FREELIST:
assert(flist_off == 0);
flist_off = off;
len = freelist_len(&e->flist);
break;
case FREE:
if (e->free.len == 0)
e->free.len = zone_left
- sizeof(struct tdb_used_record);
len = free_record_len(e->free.len);
break;
case DATA:
......@@ -241,9 +223,9 @@ struct tdb_context *tdb_layout_get(struct tdb_layout *layout)
abort();
}
off += len;
assert(zone_left >= len);
zone_left -= len;
}
/* Must have a free list! */
assert(flist_off);
mem = malloc(off);
/* Now populate our header, cribbing from a real TDB header. */
......@@ -254,24 +236,22 @@ struct tdb_context *tdb_layout_get(struct tdb_layout *layout)
free(tdb->map_ptr);
tdb->map_ptr = mem;
tdb->map_size = off;
tdb->flist_off = flist_off;
for (i = 0; i < layout->num_elems; i++) {
union tdb_layout_elem *e = &layout->elem[i];
switch (e->base.type) {
case ZONE:
set_zone(mem + e->base.off, tdb, &e->zone);
last_zone = &e->zone;
case FREELIST:
set_freelist(mem + e->base.off, tdb, &e->flist);
break;
case FREE:
set_free_record(mem + e->base.off, e->free.len);
break;
case DATA:
set_data_record(mem + e->base.off, tdb, last_zone,
&e->used);
set_data_record(mem + e->base.off, tdb, &e->used);
break;
case HASHTABLE:
set_hashtable(mem + e->base.off, tdb, last_zone,
&e->hashtable);
set_hashtable(mem + e->base.off, tdb, &e->hashtable);
break;
}
}
......@@ -280,12 +260,8 @@ struct tdb_context *tdb_layout_get(struct tdb_layout *layout)
for (i = 0; i < layout->num_elems; i++) {
union tdb_layout_elem *e = &layout->elem[i];
switch (e->base.type) {
case ZONE:
last_zone = &e->zone;
break;
case FREE:
add_to_freetable(tdb, last_zone,
e->base.off, e->free.len);
add_to_freetable(tdb, e->base.off, e->free.len);
break;
case DATA:
add_to_hashtable(tdb, e->base.off, e->used.key);
......
......@@ -3,9 +3,7 @@
#include <ccan/tdb2/private.h>
struct tdb_layout *new_tdb_layout(const char *filename);
void tdb_layout_add_zone(struct tdb_layout *layout,
unsigned int zone_bits,
bool fill_prev);
void tdb_layout_add_freelist(struct tdb_layout *layout);
void tdb_layout_add_free(struct tdb_layout *layout, tdb_len_t len);
void tdb_layout_add_used(struct tdb_layout *layout,
TDB_DATA key, TDB_DATA data,
......@@ -19,7 +17,7 @@ void tdb_layout_add_hashtable(struct tdb_layout *layout,
struct tdb_context *tdb_layout_get(struct tdb_layout *layout);
enum layout_type {
ZONE, FREE, DATA, HASHTABLE,
FREELIST, FREE, DATA, HASHTABLE,
};
/* Shared by all union members. */
......@@ -28,9 +26,8 @@ struct tle_base {
tdb_off_t off;
};
struct tle_zone {
struct tle_freelist {
struct tle_base base;
unsigned int zone_bits;
};
struct tle_free {
......@@ -54,7 +51,7 @@ struct tle_hashtable {
union tdb_layout_elem {
struct tle_base base;
struct tle_zone zone;
struct tle_freelist flist;
struct tle_free free;
struct tle_used used;
struct tle_hashtable hashtable;
......
......@@ -13,18 +13,16 @@ int main(int argc, char *argv[])
struct tdb_used_record rec;
struct tdb_context tdb = { .log = tap_log_fn, .log_priv = NULL };
plan_tests(64 + 32 + 48*7 + 1);
plan_tests(64 + 32 + 48*6 + 1);
/* We should be able to encode any data value. */
for (i = 0; i < 64; i++)
ok1(set_header(&tdb, &rec, 0, 1ULL << i, 1ULL << i, 0, 0)
== 0);
ok1(set_header(&tdb, &rec, 0, 1ULL << i, 1ULL << i, 0) == 0);
/* And any key and data with < 64 bits between them. */
for (i = 0; i < 32; i++) {
tdb_len_t dlen = 1ULL >> (63 - i), klen = 1ULL << i;
ok1(set_header(&tdb, &rec, klen, dlen, klen + dlen, 0, 0)
== 0);
ok1(set_header(&tdb, &rec, klen, dlen, klen + dlen, 0) == 0);
}
/* We should neatly encode all values. */
......@@ -33,15 +31,12 @@ int main(int argc, char *argv[])
uint64_t klen = 1ULL << (i < 16 ? i : 15);
uint64_t dlen = 1ULL << i;
uint64_t xlen = 1ULL << (i < 32 ? i : 31);
uint64_t zbits = 1ULL << (i < 6 ? i : 5);
ok1(set_header(&tdb, &rec, klen, dlen, klen + dlen + xlen, h,
zbits)
ok1(set_header(&tdb, &rec, klen, dlen, klen + dlen + xlen, h)
== 0);
ok1(rec_key_length(&rec) == klen);
ok1(rec_data_length(&rec) == dlen);
ok1(rec_extra_padding(&rec) == xlen);
ok1((uint64_t)rec_hash(&rec) == h);
ok1(rec_zone_bits(&rec) == zbits);
ok1(rec_magic(&rec) == TDB_MAGIC);
}
ok1(tap_log_messages == 0);
......
#include <ccan/tdb2/tdb.c>
#include <ccan/tdb2/free.c>
#include <ccan/tdb2/lock.c>
#include <ccan/tdb2/io.c>
#include <ccan/tdb2/hash.c>
#include <ccan/tdb2/check.c>
#include <ccan/tap/tap.h>
#include "logging.h"
#include "layout.h"
/* Calculate start of zone offset from layout directly. */
static tdb_off_t layout_zone_off(tdb_off_t off, struct tdb_layout *layout)
{
unsigned int i;
/* Every second one is a free entry, so divide by 2 to get zone */
for (i = 0; i < layout->num_elems; i++) {
if (layout->elem[i].base.type != ZONE)
continue;
if (layout->elem[i].base.off
+ (1ULL << layout->elem[i].zone.zone_bits) > off)
return layout->elem[i].base.off;
}
abort();
}
int main(int argc, char *argv[])
{
struct tdb_context *tdb;
struct tdb_layout *layout;
struct free_zone_header zhdr;
tdb_off_t off, step;
unsigned int i;
/* FIXME: Test TDB_CONVERT */
plan_tests(3 + 100);
/* No coalescing can be done due to EOF */
layout = new_tdb_layout(NULL);
tdb_layout_add_zone(layout, INITIAL_ZONE_BITS, false);
tdb_layout_add_zone(layout, INITIAL_ZONE_BITS, true);
tdb_layout_add_zone(layout, INITIAL_ZONE_BITS+1, true);
tdb_layout_add_zone(layout, INITIAL_ZONE_BITS+2, true);
tdb_layout_add_zone(layout, INITIAL_ZONE_BITS+2, true);
tdb = tdb_layout_get(layout);
ok1(tdb_check(tdb, NULL, NULL) == 0);
/* Last zone should get right zone. */
ok1(last_zone(tdb, &zhdr)
== layout->elem[layout->num_elems-1].base.off);
ok1(zhdr.zone_bits == INITIAL_ZONE_BITS+2);
off = sizeof(struct tdb_header);
step = (tdb->map_size - 1 - off) / 100;
for (i = 0; i < 100; i++, off += step) {
ok1(off_to_zone(tdb, off, &zhdr) == layout_zone_off(off, layout));
}
return exit_status();
}
......@@ -16,7 +16,7 @@ int main(int argc, char *argv[])
TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT,
TDB_NOMMAP|TDB_CONVERT };
plan_tests(sizeof(flags) / sizeof(flags[0]) * 18 + 1);
plan_tests(sizeof(flags) / sizeof(flags[0]) * 7 + 1);
for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
tdb = tdb_open("run-expand.tdb", flags[i],
......@@ -25,47 +25,15 @@ int main(int argc, char *argv[])
if (!tdb)
continue;
/* First expand. Should not fill zone. */
val = tdb->map_size - sizeof(struct tdb_header);
val = tdb->map_size;
ok1(tdb_expand(tdb, 1) == 0);
ok1(tdb->map_size < sizeof(struct tdb_header)
+ (1 << INITIAL_ZONE_BITS));
ok1(tdb->map_size >= val + 1 * TDB_EXTENSION_FACTOR);
ok1(tdb_check(tdb, NULL, NULL) == 0);
/* Fill zone. */
val = (1<<INITIAL_ZONE_BITS)
- sizeof(struct tdb_used_record)
- (tdb->map_size - sizeof(struct tdb_header));
ok1(tdb_expand(tdb, val) == 0);
ok1(tdb->map_size == sizeof(struct tdb_header)
+ (1 << INITIAL_ZONE_BITS));
val = tdb->map_size;
ok1(tdb_expand(tdb, 1024) == 0);
ok1(tdb->map_size >= val + 1024 * TDB_EXTENSION_FACTOR);
ok1(tdb_check(tdb, NULL, NULL) == 0);
/* Second expand, adds another zone of same size. */
ok1(tdb_expand(tdb, 4 << INITIAL_ZONE_BITS) == 0);
ok1(tdb->map_size ==
(2<<INITIAL_ZONE_BITS) + sizeof(struct tdb_header));
ok1(tdb_check(tdb, NULL, NULL) == 0);
/* Large expand now will double file. */
ok1(tdb_expand(tdb, 4 << INITIAL_ZONE_BITS) == 0);
ok1(tdb->map_size ==
(4<<INITIAL_ZONE_BITS) + sizeof(struct tdb_header));
ok1(tdb_check(tdb, NULL, NULL) == 0);
/* And again? */
ok1(tdb_expand(tdb, 4 << INITIAL_ZONE_BITS) == 0);
ok1(tdb->map_size ==
(8<<INITIAL_ZONE_BITS) + sizeof(struct tdb_header));
ok1(tdb_check(tdb, NULL, NULL) == 0);
/* Below comfort level, won't fill zone. */
ok1(tdb_expand(tdb,
((3 << INITIAL_ZONE_BITS)
>> TDB_COMFORT_FACTOR_BITS)
- sizeof(struct tdb_used_record)) == 0);
ok1(tdb->map_size < (12<<INITIAL_ZONE_BITS)
+ sizeof(struct tdb_header));
tdb_close(tdb);
}
......
......@@ -21,16 +21,15 @@ static tdb_len_t free_record_length(struct tdb_context *tdb, tdb_off_t off)
int main(int argc, char *argv[])
{
tdb_off_t b_off, zone_off;
tdb_off_t b_off;
struct tdb_context *tdb;
struct tdb_layout *layout;
struct tdb_data data, key;
tdb_len_t len;
unsigned int zone_bits = 16;
/* FIXME: Test TDB_CONVERT */
plan_tests(45);
plan_tests(38);
data.dptr = (void *)"world";
data.dsize = 5;
key.dptr = (void *)"hello";
......@@ -38,20 +37,18 @@ int main(int argc, char *argv[])
/* No coalescing can be done due to EOF */
layout = new_tdb_layout(NULL);
tdb_layout_add_zone(layout, zone_bits, false);
tdb_layout_add_freelist(layout);
len = 1024;
tdb_layout_add_free(layout, len);
tdb = tdb_layout_get(layout);
zone_off = layout->elem[0].base.off;
ok1(tdb_check(tdb, NULL, NULL) == 0);
ok1(free_record_length(tdb, layout->elem[1].base.off) == len);
/* Figure out which bucket free entry is. */
b_off = bucket_off(zone_off, size_to_bucket(zone_bits, len));
b_off = bucket_off(tdb->flist_off, size_to_bucket(len));
/* Lock and fail to coalesce. */
ok1(tdb_lock_free_bucket(tdb, b_off, TDB_LOCK_WAIT) == 0);
ok1(coalesce(tdb, zone_off, zone_bits, layout->elem[1].base.off,
b_off, len) == 0);
ok1(coalesce(tdb, layout->elem[1].base.off, b_off, len) == 0);
tdb_unlock_free_bucket(tdb, b_off);
ok1(free_record_length(tdb, layout->elem[1].base.off) == len);
ok1(tdb_check(tdb, NULL, NULL) == 0);
......@@ -59,20 +56,18 @@ int main(int argc, char *argv[])
/* No coalescing can be done due to used record */
layout = new_tdb_layout(NULL);
tdb_layout_add_zone(layout, zone_bits, false);
tdb_layout_add_freelist(layout);
tdb_layout_add_free(layout, 1024);
tdb_layout_add_used(layout, key, data, 6);
tdb = tdb_layout_get(layout);
zone_off = layout->elem[0].base.off;
ok1(free_record_length(tdb, layout->elem[1].base.off) == 1024);
ok1(tdb_check(tdb, NULL, NULL) == 0);
/* Figure out which bucket free entry is. */
b_off = bucket_off(zone_off, size_to_bucket(zone_bits, 1024));
b_off = bucket_off(tdb->flist_off, size_to_bucket(1024));
/* Lock and fail to coalesce. */
ok1(tdb_lock_free_bucket(tdb, b_off, TDB_LOCK_WAIT) == 0);
ok1(coalesce(tdb, zone_off, zone_bits, layout->elem[1].base.off,
b_off, 1024) == 0);
ok1(coalesce(tdb, layout->elem[1].base.off, b_off, 1024) == 0);
tdb_unlock_free_bucket(tdb, b_off);
ok1(free_record_length(tdb, layout->elem[1].base.off) == 1024);
ok1(tdb_check(tdb, NULL, NULL) == 0);
......@@ -80,21 +75,19 @@ int main(int argc, char *argv[])
/* Coalescing can be done due to two free records, then EOF */
layout = new_tdb_layout(NULL);
tdb_layout_add_zone(layout, zone_bits, false);
tdb_layout_add_freelist(layout);
tdb_layout_add_free(layout, 1024);
tdb_layout_add_free(layout, 2048);
tdb = tdb_layout_get(layout);
zone_off = layout->elem[0].base.off;
ok1(free_record_length(tdb, layout->elem[1].base.off) == 1024);
ok1(free_record_length(tdb, layout->elem[2].base.off) == 2048);
ok1(tdb_check(tdb, NULL, NULL) == 0);
/* Figure out which bucket (first) free entry is. */
b_off = bucket_off(zone_off, size_to_bucket(zone_bits, 1024));
b_off = bucket_off(tdb->flist_off, size_to_bucket(1024));
/* Lock and coalesce. */
ok1(tdb_lock_free_bucket(tdb, b_off, TDB_LOCK_WAIT) == 0);
ok1(coalesce(tdb, zone_off, zone_bits, layout->elem[1].base.off,
b_off, 1024) == 1);
ok1(coalesce(tdb, layout->elem[1].base.off, b_off, 1024) == 1);
ok1(!tdb_has_locks(tdb));
ok1(free_record_length(tdb, layout->elem[1].base.off)
== 1024 + sizeof(struct tdb_used_record) + 2048);
......@@ -103,22 +96,20 @@ int main(int argc, char *argv[])
/* Coalescing can be done due to two free records, then data */
layout = new_tdb_layout(NULL);
tdb_layout_add_zone(layout, zone_bits, false);
tdb_layout_add_freelist(layout);
tdb_layout_add_free(layout, 1024);
tdb_layout_add_free(layout, 512);
tdb_layout_add_used(layout, key, data, 6);
tdb = tdb_layout_get(layout);
zone_off = layout->elem[0].base.off;
ok1(free_record_length(tdb, layout->elem[1].base.off) == 1024);
ok1(free_record_length(tdb, layout->elem[2].base.off) == 512);
ok1(tdb_check(tdb, NULL, NULL) == 0);
/* Figure out which bucket free entry is. */
b_off = bucket_off(zone_off, size_to_bucket(zone_bits, 1024));
b_off = bucket_off(tdb->flist_off, size_to_bucket(1024));
/* Lock and coalesce. */
ok1(tdb_lock_free_bucket(tdb, b_off, TDB_LOCK_WAIT) == 0);
ok1(coalesce(tdb, zone_off, zone_bits, layout->elem[1].base.off,
b_off, 1024) == 1);
ok1(coalesce(tdb, layout->elem[1].base.off, b_off, 1024) == 1);
ok1(!tdb_has_locks(tdb));
ok1(free_record_length(tdb, layout->elem[1].base.off)
== 1024 + sizeof(struct tdb_used_record) + 512);
......@@ -127,23 +118,21 @@ int main(int argc, char *argv[])
/* Coalescing can be done due to three free records, then EOF */
layout = new_tdb_layout(NULL);
tdb_layout_add_zone(layout, zone_bits, false);
tdb_layout_add_freelist(layout);
tdb_layout_add_free(layout, 1024);
tdb_layout_add_free(layout, 512);
tdb_layout_add_free(layout, 256);
tdb = tdb_layout_get(layout);
zone_off = layout->elem[0].base.off;
ok1(free_record_length(tdb, layout->elem[1].base.off) == 1024);
ok1(free_record_length(tdb, layout->elem[2].base.off) == 512);
ok1(free_record_length(tdb, layout->elem[3].base.off) == 256);
ok1(tdb_check(tdb, NULL, NULL) == 0);
/* Figure out which bucket free entry is. */
b_off = bucket_off(zone_off, size_to_bucket(zone_bits, 1024));
b_off = bucket_off(tdb->flist_off, size_to_bucket(1024));
/* Lock and coalesce. */
ok1(tdb_lock_free_bucket(tdb, b_off, TDB_LOCK_WAIT) == 0);
ok1(coalesce(tdb, zone_off, zone_bits, layout->elem[1].base.off,
b_off, 1024) == 1);
ok1(coalesce(tdb, layout->elem[1].base.off, b_off, 1024) == 1);
ok1(!tdb_has_locks(tdb));
ok1(free_record_length(tdb, layout->elem[1].base.off)
== 1024 + sizeof(struct tdb_used_record) + 512
......@@ -151,28 +140,6 @@ int main(int argc, char *argv[])
ok1(tdb_check(tdb, NULL, NULL) == 0);
tdb_close(tdb);
/* Coalescing across two zones isn't possible. */
layout = new_tdb_layout(NULL);
tdb_layout_add_zone(layout, zone_bits, false);
tdb_layout_add_zone(layout, zone_bits, true);
tdb = tdb_layout_get(layout);
zone_off = layout->elem[0].base.off;
len = layout->elem[1].free.len;
ok1(free_record_length(tdb, layout->elem[1].base.off) == len);
ok1(tdb_check(tdb, NULL, NULL) == 0);
/* Figure out which list free entry is. */
b_off = bucket_off(zone_off, size_to_bucket(zone_bits, len));
/* Lock and coalesce. */
ok1(tdb_lock_free_bucket(tdb, b_off, TDB_LOCK_WAIT) == 0);
ok1(coalesce(tdb, zone_off, zone_bits, layout->elem[1].base.off,
b_off, len) == 0);
tdb_unlock_free_bucket(tdb, b_off);
ok1(!tdb_has_locks(tdb));
ok1(free_record_length(tdb, layout->elem[1].base.off) == len);
ok1(tdb_check(tdb, NULL, NULL) == 0);
tdb_close(tdb);
ok1(tap_log_messages == 0);
return exit_status();
}
......@@ -169,7 +169,7 @@ int main(int argc, char *argv[])
/* Simple delete should work. */
ok1(delete_from_hash(tdb, &h) == 0);
ok1(add_free_record(tdb, rec_zone_bits(&rec), new_off,
ok1(add_free_record(tdb, new_off,
sizeof(struct tdb_used_record)
+ rec_key_length(&rec)
+ rec_data_length(&rec)
......
......@@ -8,6 +8,23 @@
#include <err.h>
#include "logging.h"
static bool empty_freelist(struct tdb_context *tdb)
{
struct tdb_freelist free;
unsigned int i;
/* Now, free list should be completely exhausted in zone 0 */
if (tdb_read_convert(tdb, tdb->flist_off, &free, sizeof(free)) != 0)
abort();
for (i = 0; i < sizeof(free.buckets)/sizeof(free.buckets[0]); i++) {
if (free.buckets[i])
return false;
}
return true;
}
int main(int argc, char *argv[])
{
unsigned int i, j;
......@@ -16,12 +33,12 @@ int main(int argc, char *argv[])
TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT,
TDB_NOMMAP|TDB_CONVERT };
plan_tests(sizeof(flags) / sizeof(flags[0]) * 5 + 1);
plan_tests(sizeof(flags) / sizeof(flags[0]) * 7 + 1);
for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
tdb_off_t free[BUCKETS_FOR_ZONE(INITIAL_ZONE_BITS) + 1];
bool all_empty;
TDB_DATA k, d;
TDB_DATA k;
uint64_t size;
bool was_empty = false;
k.dptr = (void *)&j;
k.dsize = sizeof(j);
......@@ -32,38 +49,23 @@ int main(int argc, char *argv[])
if (!tdb)
continue;
/* We don't want the hash to expand, so we use one alloc to
* chew up over most of the space first. */
j = -1;
d.dsize = (1 << INITIAL_ZONE_BITS) - 500;
d.dptr = malloc(d.dsize);
ok1(tdb_store(tdb, k, d, TDB_INSERT) == 0);
ok1(tdb->map_size == sizeof(struct tdb_header)
+ (1 << INITIAL_ZONE_BITS));
ok1(empty_freelist(tdb));
/* Create some free space. */
ok1(tdb_expand(tdb, 1) == 0);
ok1(tdb_check(tdb, NULL, NULL) == 0);
ok1(!empty_freelist(tdb));
/* Insert minimal-length records until we add a zone. */
for (j = 0;
tdb->map_size == sizeof(struct tdb_header)
+ (1 << INITIAL_ZONE_BITS);
j++) {
size = tdb->map_size;
/* Insert minimal-length records until we expand. */
for (j = 0; tdb->map_size == size; j++) {
was_empty = empty_freelist(tdb);
if (tdb_store(tdb, k, k, TDB_INSERT) != 0)
err(1, "Failed to store record %i", j);
}
/* Now, free list should be completely exhausted in zone 0 */
ok1(tdb_read_convert(tdb,
sizeof(struct tdb_header)
+ sizeof(struct free_zone_header),
&free, sizeof(free)) == 0);
all_empty = true;
for (j = 0; j < sizeof(free)/sizeof(free[0]); j++) {
if (free[j]) {
diag("Free bucket %i not empty", j);
all_empty = false;
}
}
ok1(all_empty);
/* Would have been empty before expansion, but no longer. */
ok1(was_empty);
ok1(!empty_freelist(tdb));
tdb_close(tdb);
}
......
......@@ -44,15 +44,21 @@ int main(int argc, char *argv[])
struct trav_data td;
TDB_DATA k, k2;
struct tdb_context *tdb;
union tdb_attribute seed_attr;
int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP,
TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT,
TDB_NOMMAP|TDB_CONVERT };
seed_attr.base.attr = TDB_ATTRIBUTE_SEED;
seed_attr.base.next = &tap_log_attr;
seed_attr.seed.seed = 6334326220117065685ULL;
plan_tests(sizeof(flags) / sizeof(flags[0])
* (NUM_RECORDS*4 + (NUM_RECORDS-1)*2 + 20) + 1);
for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
tdb = tdb_open("run-traverse.tdb", flags[i],
O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
O_RDWR|O_CREAT|O_TRUNC, 0600, &seed_attr);
ok1(tdb);
if (!tdb)
continue;
......
......@@ -19,7 +19,7 @@ int main(int argc, char *argv[])
struct tdb_data data = { (unsigned char *)&j, sizeof(j) };
char *summary;
plan_tests(sizeof(flags) / sizeof(flags[0]) * (1 + 2 * 17) + 1);
plan_tests(sizeof(flags) / sizeof(flags[0]) * (1 + 2 * 4) + 1);
for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
tdb = tdb_open("run-summary.tdb", flags[i],
O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
......@@ -42,19 +42,6 @@ int main(int argc, char *argv[])
ok1(strstr(summary, "Number of records: 500\n"));
ok1(strstr(summary, "Smallest/average/largest keys: 4/4/4\n"));
ok1(strstr(summary, "Smallest/average/largest data: 0/2/4\n"));
ok1(strstr(summary, "Free bucket 16:"));
ok1(strstr(summary, "Free bucket 24:"));
ok1(strstr(summary, "Free bucket 32:"));
ok1(strstr(summary, "Free bucket 40:"));
ok1(strstr(summary, "Free bucket 48:"));
ok1(strstr(summary, "Free bucket 56:"));
ok1(strstr(summary, "Free bucket 64:"));
ok1(strstr(summary, "Free bucket 72:"));
ok1(strstr(summary, "Free bucket 80:"));
ok1(strstr(summary, "Free bucket 88-136:"));
ok1(strstr(summary, "Free bucket 144-264:"));
ok1(strstr(summary, "Free bucket 272-520:"));
ok1(strstr(summary, "Free bucket 528-1032:"));
if (j == TDB_SUMMARY_HISTOGRAMS)
ok1(strstr(summary, "|")
&& strstr(summary, "*"));
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment