Commit 2523e67f authored by Rusty Russell's avatar Rusty Russell

tdb2: hash chaining

If we get enough hash collisions, we can run out of hash bits; this almost
certainly is caused by a deliberate attempt to break the tdb (hash bombing).

Implement chained records for this case; they're slow but will keep the
rest of the database functioning.
parent 539f1af0
...@@ -81,6 +81,55 @@ static bool check_hash_tree(struct tdb_context *tdb, ...@@ -81,6 +81,55 @@ static bool check_hash_tree(struct tdb_context *tdb,
int (*check)(TDB_DATA, TDB_DATA, void *), int (*check)(TDB_DATA, TDB_DATA, void *),
void *private_data); void *private_data);
static bool check_hash_chain(struct tdb_context *tdb,
tdb_off_t off,
uint64_t hash,
tdb_off_t used[],
size_t num_used,
size_t *num_found,
int (*check)(TDB_DATA, TDB_DATA, void *),
void *private_data)
{
struct tdb_used_record rec;
if (tdb_read_convert(tdb, off, &rec, sizeof(rec)) == -1)
return false;
if (rec_data_length(&rec) != sizeof(struct tdb_chain)) {
tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_DEBUG_ERROR,
"tdb_check: Bad hash chain length %llu vs %zu",
(long long)rec_data_length(&rec),
sizeof(struct tdb_chain));
return false;
}
if (rec_key_length(&rec) != 0) {
tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_DEBUG_ERROR,
"tdb_check: Bad hash chain key length %llu",
(long long)rec_key_length(&rec));
return false;
}
if (rec_hash(&rec) != 2) {
tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_DEBUG_ERROR,
"tdb_check: Bad hash chain hash value %llu",
(long long)rec_hash(&rec));
return false;
}
off += sizeof(rec);
if (!check_hash_tree(tdb, off, 0, hash, 64,
used, num_used, num_found, check, private_data))
return false;
off = tdb_read_off(tdb, off + offsetof(struct tdb_chain, next));
if (off == TDB_OFF_ERR)
return false;
if (off == 0)
return true;
(*num_found)++;
return check_hash_chain(tdb, off, hash, used, num_used, num_found,
check, private_data);
}
static bool check_hash_record(struct tdb_context *tdb, static bool check_hash_record(struct tdb_context *tdb,
tdb_off_t off, tdb_off_t off,
uint64_t hprefix, uint64_t hprefix,
...@@ -93,6 +142,10 @@ static bool check_hash_record(struct tdb_context *tdb, ...@@ -93,6 +142,10 @@ static bool check_hash_record(struct tdb_context *tdb,
{ {
struct tdb_used_record rec; struct tdb_used_record rec;
if (hprefix_bits >= 64)
return check_hash_chain(tdb, off, hprefix, used, num_used,
num_found, check, private_data);
if (tdb_read_convert(tdb, off, &rec, sizeof(rec)) == -1) if (tdb_read_convert(tdb, off, &rec, sizeof(rec)) == -1)
return false; return false;
...@@ -183,6 +236,32 @@ static bool check_hash_tree(struct tdb_context *tdb, ...@@ -183,6 +236,32 @@ static bool check_hash_tree(struct tdb_context *tdb,
*p ^= 1; *p ^= 1;
(*num_found)++; (*num_found)++;
if (hprefix_bits == 64) {
/* Chained entries are unordered. */
if (is_subhash(group[b])) {
tdb_logerr(tdb, TDB_ERR_CORRUPT,
TDB_DEBUG_ERROR,
"tdb_check: Invalid chain"
" entry subhash");
goto fail;
}
h = hash_record(tdb, off);
if (h != hprefix) {
tdb_logerr(tdb, TDB_ERR_CORRUPT,
TDB_DEBUG_ERROR,
"check: bad hash chain"
" placement"
" 0x%llx vs 0x%llx",
(long long)h,
(long long)hprefix);
goto fail;
}
if (tdb_read_convert(tdb, off, &rec,
sizeof(rec)))
goto fail;
goto check;
}
if (is_subhash(group[b])) { if (is_subhash(group[b])) {
uint64_t subprefix; uint64_t subprefix;
subprefix = (hprefix subprefix = (hprefix
...@@ -269,6 +348,7 @@ static bool check_hash_tree(struct tdb_context *tdb, ...@@ -269,6 +348,7 @@ static bool check_hash_tree(struct tdb_context *tdb,
goto fail; goto fail;
} }
check:
if (check) { if (check) {
TDB_DATA key, data; TDB_DATA key, data;
key.dsize = rec_key_length(&rec); key.dsize = rec_key_length(&rec);
......
...@@ -78,6 +78,30 @@ static uint32_t use_bits(struct hash_info *h, unsigned num) ...@@ -78,6 +78,30 @@ static uint32_t use_bits(struct hash_info *h, unsigned num)
return bits(h->h, 64 - h->hash_used, num); return bits(h->h, 64 - h->hash_used, num);
} }
static bool key_matches(struct tdb_context *tdb,
const struct tdb_used_record *rec,
tdb_off_t off,
const struct tdb_data *key)
{
bool ret = false;
const char *rkey;
if (rec_key_length(rec) != key->dsize) {
add_stat(tdb, compare_wrong_keylen, 1);
return ret;
}
rkey = tdb_access_read(tdb, off + sizeof(*rec), key->dsize, false);
if (!rkey)
return ret;
if (memcmp(rkey, key->dptr, key->dsize) == 0)
ret = true;
else
add_stat(tdb, compare_wrong_keycmp, 1);
tdb_access_release(tdb, rkey);
return ret;
}
/* Does entry match? */ /* Does entry match? */
static bool match(struct tdb_context *tdb, static bool match(struct tdb_context *tdb,
struct hash_info *h, struct hash_info *h,
...@@ -85,15 +109,13 @@ static bool match(struct tdb_context *tdb, ...@@ -85,15 +109,13 @@ static bool match(struct tdb_context *tdb,
tdb_off_t val, tdb_off_t val,
struct tdb_used_record *rec) struct tdb_used_record *rec)
{ {
bool ret = false;
const unsigned char *rkey;
tdb_off_t off; tdb_off_t off;
add_stat(tdb, compares, 1); add_stat(tdb, compares, 1);
/* Desired bucket must match. */ /* Desired bucket must match. */
if (h->home_bucket != (val & TDB_OFF_HASH_GROUP_MASK)) { if (h->home_bucket != (val & TDB_OFF_HASH_GROUP_MASK)) {
add_stat(tdb, compare_wrong_bucket, 1); add_stat(tdb, compare_wrong_bucket, 1);
return ret; return false;
} }
/* Top bits of offset == next bits of hash. */ /* Top bits of offset == next bits of hash. */
...@@ -101,32 +123,19 @@ static bool match(struct tdb_context *tdb, ...@@ -101,32 +123,19 @@ static bool match(struct tdb_context *tdb,
!= bits(h->h, 64 - h->hash_used - TDB_OFF_UPPER_STEAL_EXTRA, != bits(h->h, 64 - h->hash_used - TDB_OFF_UPPER_STEAL_EXTRA,
TDB_OFF_UPPER_STEAL_EXTRA)) { TDB_OFF_UPPER_STEAL_EXTRA)) {
add_stat(tdb, compare_wrong_offsetbits, 1); add_stat(tdb, compare_wrong_offsetbits, 1);
return ret; return false;
} }
off = val & TDB_OFF_MASK; off = val & TDB_OFF_MASK;
if (tdb_read_convert(tdb, off, rec, sizeof(*rec)) == -1) if (tdb_read_convert(tdb, off, rec, sizeof(*rec)) == -1)
return ret; return false;
if (rec_key_length(rec) != key->dsize) {
add_stat(tdb, compare_wrong_keylen, 1);
return ret;
}
if ((h->h & ((1 << 11)-1)) != rec_hash(rec)) { if ((h->h & ((1 << 11)-1)) != rec_hash(rec)) {
add_stat(tdb, compare_wrong_rechash, 1); add_stat(tdb, compare_wrong_rechash, 1);
return false; return false;
} }
rkey = tdb_access_read(tdb, off + sizeof(*rec), key->dsize, false); return key_matches(tdb, rec, off, key);
if (!rkey)
return ret;
if (memcmp(rkey, key->dptr, key->dsize) == 0)
ret = true;
else
add_stat(tdb, compare_wrong_keycmp, 1);
tdb_access_release(tdb, rkey);
return ret;
} }
static tdb_off_t hbucket_off(tdb_off_t group_start, unsigned bucket) static tdb_off_t hbucket_off(tdb_off_t group_start, unsigned bucket)
...@@ -147,6 +156,65 @@ static tdb_off_t hlock_range(tdb_off_t group, tdb_off_t *size) ...@@ -147,6 +156,65 @@ static tdb_off_t hlock_range(tdb_off_t group, tdb_off_t *size)
return group << (64 - (TDB_TOPLEVEL_HASH_BITS - TDB_HASH_GROUP_BITS)); return group << (64 - (TDB_TOPLEVEL_HASH_BITS - TDB_HASH_GROUP_BITS));
} }
static tdb_off_t COLD find_in_chain(struct tdb_context *tdb,
struct tdb_data key,
tdb_off_t chain,
struct hash_info *h,
struct tdb_used_record *rec,
struct traverse_info *tinfo)
{
tdb_off_t off, next;
/* In case nothing is free, we set these to zero. */
h->home_bucket = h->found_bucket = 0;
for (off = chain; off; off = next) {
unsigned int i;
h->group_start = off;
if (tdb_read_convert(tdb, off, h->group, sizeof(h->group)))
return TDB_OFF_ERR;
for (i = 0; i < (1 << TDB_HASH_GROUP_BITS); i++) {
tdb_off_t recoff;
if (!h->group[i]) {
/* Remember this empty bucket. */
h->home_bucket = h->found_bucket = i;
continue;
}
/* We can insert extra bits via add_to_hash
* empty bucket logic. */
recoff = h->group[i] & TDB_OFF_MASK;
if (tdb_read_convert(tdb, recoff, rec, sizeof(*rec)))
return TDB_OFF_ERR;
if (key_matches(tdb, rec, recoff, &key)) {
h->home_bucket = h->found_bucket = i;
if (tinfo) {
tinfo->levels[tinfo->num_levels]
.hashtable = off;
tinfo->levels[tinfo->num_levels]
.total_buckets
= 1 << TDB_HASH_GROUP_BITS;
tinfo->levels[tinfo->num_levels].entry
= i;
tinfo->num_levels++;
}
return recoff;
}
}
next = tdb_read_off(tdb, off
+ offsetof(struct tdb_chain, next));
if (next == TDB_OFF_ERR)
return TDB_OFF_ERR;
if (next)
next += sizeof(struct tdb_used_record);
}
return 0;
}
/* This is the core routine which searches the hashtable for an entry. /* This is the core routine which searches the hashtable for an entry.
* On error, no locks are held and TDB_OFF_ERR is returned. * On error, no locks are held and TDB_OFF_ERR is returned.
* Otherwise, hinfo is filled in (and the optional tinfo). * Otherwise, hinfo is filled in (and the optional tinfo).
...@@ -182,7 +250,7 @@ tdb_off_t find_and_lock(struct tdb_context *tdb, ...@@ -182,7 +250,7 @@ tdb_off_t find_and_lock(struct tdb_context *tdb,
tinfo->levels[0].total_buckets = 1 << TDB_HASH_GROUP_BITS; tinfo->levels[0].total_buckets = 1 << TDB_HASH_GROUP_BITS;
} }
while (likely(h->hash_used < 64)) { while (h->hash_used <= 64) {
/* Read in the hash group. */ /* Read in the hash group. */
h->group_start = hashtable h->group_start = hashtable
+ group * (sizeof(tdb_off_t) << TDB_HASH_GROUP_BITS); + group * (sizeof(tdb_off_t) << TDB_HASH_GROUP_BITS);
...@@ -239,8 +307,7 @@ tdb_off_t find_and_lock(struct tdb_context *tdb, ...@@ -239,8 +307,7 @@ tdb_off_t find_and_lock(struct tdb_context *tdb,
return 0; return 0;
} }
/* FIXME: We hit the bottom. Chain! */ return find_in_chain(tdb, key, hashtable, h, rec, tinfo);
abort();
fail: fail:
tdb_unlock_hashes(tdb, h->hlock_start, h->hlock_range, ltype); tdb_unlock_hashes(tdb, h->hlock_start, h->hlock_range, ltype);
...@@ -326,6 +393,41 @@ int replace_in_hash(struct tdb_context *tdb, ...@@ -326,6 +393,41 @@ int replace_in_hash(struct tdb_context *tdb,
encode_offset(new_off, h)); encode_offset(new_off, h));
} }
/* We slot in anywhere that's empty in the chain. */
static int COLD add_to_chain(struct tdb_context *tdb,
tdb_off_t subhash,
tdb_off_t new_off)
{
size_t entry = tdb_find_zero_off(tdb, subhash, 1<<TDB_HASH_GROUP_BITS);
if (entry == 1 << TDB_HASH_GROUP_BITS) {
tdb_off_t next;
next = tdb_read_off(tdb, subhash
+ offsetof(struct tdb_chain, next));
if (next == TDB_OFF_ERR)
return -1;
if (!next) {
next = alloc(tdb, 0, sizeof(struct tdb_chain), 2,
false);
if (next == TDB_OFF_ERR)
return -1;
if (zero_out(tdb, next+sizeof(struct tdb_used_record),
sizeof(struct tdb_chain)))
return -1;
if (tdb_write_off(tdb, subhash
+ offsetof(struct tdb_chain, next),
next) != 0)
return -1;
}
return add_to_chain(tdb, next, new_off);
}
return tdb_write_off(tdb, subhash + entry * sizeof(tdb_off_t),
new_off);
}
/* Add into a newly created subhash. */ /* Add into a newly created subhash. */
static int add_to_subhash(struct tdb_context *tdb, tdb_off_t subhash, static int add_to_subhash(struct tdb_context *tdb, tdb_off_t subhash,
unsigned hash_used, tdb_off_t val) unsigned hash_used, tdb_off_t val)
...@@ -336,13 +438,12 @@ static int add_to_subhash(struct tdb_context *tdb, tdb_off_t subhash, ...@@ -336,13 +438,12 @@ static int add_to_subhash(struct tdb_context *tdb, tdb_off_t subhash,
h.hash_used = hash_used; h.hash_used = hash_used;
/* FIXME chain if hash_used == 64 */
if (hash_used + TDB_SUBLEVEL_HASH_BITS > 64) if (hash_used + TDB_SUBLEVEL_HASH_BITS > 64)
abort(); return add_to_chain(tdb, subhash, off);
h.h = hash_record(tdb, off); h.h = hash_record(tdb, off);
gnum = use_bits(&h, TDB_SUBLEVEL_HASH_BITS-TDB_HASH_GROUP_BITS); gnum = use_bits(&h, TDB_SUBLEVEL_HASH_BITS-TDB_HASH_GROUP_BITS);
h.group_start = subhash + sizeof(struct tdb_used_record) h.group_start = subhash
+ gnum * (sizeof(tdb_off_t) << TDB_HASH_GROUP_BITS); + gnum * (sizeof(tdb_off_t) << TDB_HASH_GROUP_BITS);
h.home_bucket = use_bits(&h, TDB_HASH_GROUP_BITS); h.home_bucket = use_bits(&h, TDB_HASH_GROUP_BITS);
...@@ -356,21 +457,29 @@ static int add_to_subhash(struct tdb_context *tdb, tdb_off_t subhash, ...@@ -356,21 +457,29 @@ static int add_to_subhash(struct tdb_context *tdb, tdb_off_t subhash,
static int expand_group(struct tdb_context *tdb, struct hash_info *h) static int expand_group(struct tdb_context *tdb, struct hash_info *h)
{ {
unsigned bucket, num_vals, i; unsigned bucket, num_vals, i, hash;
size_t subsize;
tdb_off_t subhash; tdb_off_t subhash;
tdb_off_t vals[1 << TDB_HASH_GROUP_BITS]; tdb_off_t vals[1 << TDB_HASH_GROUP_BITS];
/* Attach new empty subhash under fullest bucket. */ /* Attach new empty subhash under fullest bucket. */
bucket = fullest_bucket(tdb, h->group, h->home_bucket); bucket = fullest_bucket(tdb, h->group, h->home_bucket);
subhash = alloc(tdb, 0, sizeof(tdb_off_t) << TDB_SUBLEVEL_HASH_BITS, if (h->hash_used == 64) {
0, false); add_stat(tdb, alloc_chain, 1);
subsize = sizeof(struct tdb_chain);
hash = 2;
} else {
add_stat(tdb, alloc_subhash, 1);
subsize = (sizeof(tdb_off_t) << TDB_SUBLEVEL_HASH_BITS);
hash = 0;
}
subhash = alloc(tdb, 0, subsize, hash, false);
if (subhash == TDB_OFF_ERR) if (subhash == TDB_OFF_ERR)
return -1; return -1;
add_stat(tdb, alloc_subhash, 1); if (zero_out(tdb, subhash + sizeof(struct tdb_used_record), subsize))
if (zero_out(tdb, subhash + sizeof(struct tdb_used_record),
sizeof(tdb_off_t) << TDB_SUBLEVEL_HASH_BITS) == -1)
return -1; return -1;
/* Remove any which are destined for bucket or are in wrong place. */ /* Remove any which are destined for bucket or are in wrong place. */
...@@ -390,6 +499,9 @@ static int expand_group(struct tdb_context *tdb, struct hash_info *h) ...@@ -390,6 +499,9 @@ static int expand_group(struct tdb_context *tdb, struct hash_info *h)
/* Overwrite expanded bucket with subhash pointer. */ /* Overwrite expanded bucket with subhash pointer. */
h->group[bucket] = subhash | (1ULL << TDB_OFF_UPPER_STEAL_SUBHASH_BIT); h->group[bucket] = subhash | (1ULL << TDB_OFF_UPPER_STEAL_SUBHASH_BIT);
/* Point to actual contents of record. */
subhash += sizeof(struct tdb_used_record);
/* Put values back. */ /* Put values back. */
for (i = 0; i < num_vals; i++) { for (i = 0; i < num_vals; i++) {
unsigned this_bucket = vals[i] & TDB_OFF_HASH_GROUP_MASK; unsigned this_bucket = vals[i] & TDB_OFF_HASH_GROUP_MASK;
...@@ -444,10 +556,6 @@ int delete_from_hash(struct tdb_context *tdb, struct hash_info *h) ...@@ -444,10 +556,6 @@ int delete_from_hash(struct tdb_context *tdb, struct hash_info *h)
int add_to_hash(struct tdb_context *tdb, struct hash_info *h, tdb_off_t new_off) int add_to_hash(struct tdb_context *tdb, struct hash_info *h, tdb_off_t new_off)
{ {
/* FIXME: chain! */
if (h->hash_used >= 64)
abort();
/* We hit an empty bucket during search? That's where it goes. */ /* We hit an empty bucket during search? That's where it goes. */
if (!h->group[h->found_bucket]) { if (!h->group[h->found_bucket]) {
h->group[h->found_bucket] = encode_offset(new_off, h); h->group[h->found_bucket] = encode_offset(new_off, h);
...@@ -456,6 +564,9 @@ int add_to_hash(struct tdb_context *tdb, struct hash_info *h, tdb_off_t new_off) ...@@ -456,6 +564,9 @@ int add_to_hash(struct tdb_context *tdb, struct hash_info *h, tdb_off_t new_off)
h->group, sizeof(h->group)); h->group, sizeof(h->group));
} }
if (h->hash_used > 64)
return add_to_chain(tdb, h->group_start, new_off);
/* We're full. Expand. */ /* We're full. Expand. */
if (expand_group(tdb, h) == -1) if (expand_group(tdb, h) == -1)
return -1; return -1;
...@@ -534,7 +645,11 @@ again: ...@@ -534,7 +645,11 @@ again:
tlevel++; tlevel++;
tlevel->hashtable = off + sizeof(struct tdb_used_record); tlevel->hashtable = off + sizeof(struct tdb_used_record);
tlevel->entry = 0; tlevel->entry = 0;
tlevel->total_buckets = (1 << TDB_SUBLEVEL_HASH_BITS); /* Next level is a chain? */
if (unlikely(tinfo->num_levels == TDB_MAX_LEVELS + 1))
tlevel->total_buckets = (1 << TDB_HASH_GROUP_BITS);
else
tlevel->total_buckets = (1 << TDB_SUBLEVEL_HASH_BITS);
goto again; goto again;
} }
...@@ -542,6 +657,20 @@ again: ...@@ -542,6 +657,20 @@ again:
if (tinfo->num_levels == 1) if (tinfo->num_levels == 1)
return 0; return 0;
/* Handle chained entries. */
if (unlikely(tinfo->num_levels == TDB_MAX_LEVELS + 1)) {
tlevel->hashtable = tdb_read_off(tdb, tlevel->hashtable
+ offsetof(struct tdb_chain,
next));
if (tlevel->hashtable == TDB_OFF_ERR)
return TDB_OFF_ERR;
if (tlevel->hashtable) {
tlevel->hashtable += sizeof(struct tdb_used_record);
tlevel->entry = 0;
goto again;
}
}
/* Go back up and keep searching. */ /* Go back up and keep searching. */
tinfo->num_levels--; tinfo->num_levels--;
tlevel--; tlevel--;
......
...@@ -91,6 +91,8 @@ typedef uint64_t tdb_off_t; ...@@ -91,6 +91,8 @@ typedef uint64_t tdb_off_t;
#define TDB_SUBLEVEL_HASH_BITS 6 #define TDB_SUBLEVEL_HASH_BITS 6
/* And 8 entries in each group, ie 8 groups per sublevel. */ /* And 8 entries in each group, ie 8 groups per sublevel. */
#define TDB_HASH_GROUP_BITS 3 #define TDB_HASH_GROUP_BITS 3
/* This is currently 10: beyond this we chain. */
#define TDB_MAX_LEVELS (1+(64-TDB_TOPLEVEL_HASH_BITS) / TDB_SUBLEVEL_HASH_BITS)
/* Extend file by least 100 times larger than needed. */ /* Extend file by least 100 times larger than needed. */
#define TDB_EXTENSION_FACTOR 100 #define TDB_EXTENSION_FACTOR 100
...@@ -212,6 +214,12 @@ struct tdb_recovery_record { ...@@ -212,6 +214,12 @@ struct tdb_recovery_record {
uint64_t eof; uint64_t eof;
}; };
/* If we bottom out of the subhashes, we chain. */
struct tdb_chain {
tdb_off_t rec[1 << TDB_HASH_GROUP_BITS];
tdb_off_t next;
};
/* this is stored at the front of every database */ /* this is stored at the front of every database */
struct tdb_header { struct tdb_header {
char magic_food[64]; /* for /etc/magic */ char magic_food[64]; /* for /etc/magic */
...@@ -259,7 +267,7 @@ struct traverse_info { ...@@ -259,7 +267,7 @@ struct traverse_info {
/* We ignore groups here, and treat it as a big array. */ /* We ignore groups here, and treat it as a big array. */
unsigned entry; unsigned entry;
unsigned int total_buckets; unsigned int total_buckets;
} levels[64 / TDB_SUBLEVEL_HASH_BITS]; } levels[TDB_MAX_LEVELS + 1];
unsigned int num_levels; unsigned int num_levels;
unsigned int toplevel_group; unsigned int toplevel_group;
/* This makes delete-everything-inside-traverse work as expected. */ /* This makes delete-everything-inside-traverse work as expected. */
......
...@@ -43,7 +43,8 @@ static bool summarize(struct tdb_context *tdb, ...@@ -43,7 +43,8 @@ static bool summarize(struct tdb_context *tdb,
struct tally *data, struct tally *data,
struct tally *extra, struct tally *extra,
struct tally *uncoal, struct tally *uncoal,
struct tally *buckets) struct tally *buckets,
struct tally *chains)
{ {
tdb_off_t off; tdb_off_t off;
tdb_len_t len; tdb_len_t len;
...@@ -83,7 +84,7 @@ static bool summarize(struct tdb_context *tdb, ...@@ -83,7 +84,7 @@ static bool summarize(struct tdb_context *tdb,
+ rec_extra_padding(&p->u); + rec_extra_padding(&p->u);
/* FIXME: Use different magic for hashes, flists. */ /* FIXME: Use different magic for hashes, flists. */
if (!rec_key_length(&p->u) && rec_hash(&p->u) < 2) { if (!rec_key_length(&p->u) && rec_hash(&p->u) < 3) {
if (rec_hash(&p->u) == 0) { if (rec_hash(&p->u) == 0) {
int count = count_hash(tdb, int count = count_hash(tdb,
off + sizeof(p->u), off + sizeof(p->u),
...@@ -91,9 +92,11 @@ static bool summarize(struct tdb_context *tdb, ...@@ -91,9 +92,11 @@ static bool summarize(struct tdb_context *tdb,
if (count == -1) if (count == -1)
return false; return false;
tally_add(hashes, count); tally_add(hashes, count);
} else { } else if (rec_hash(&p->u) == 1) {
tally_add(flists, tally_add(flists,
rec_data_length(&p->u)); rec_data_length(&p->u));
} else if (rec_hash(&p->u) == 2) {
tally_add(chains, 1);
} }
} else { } else {
tally_add(keys, rec_key_length(&p->u)); tally_add(keys, rec_key_length(&p->u));
...@@ -121,6 +124,7 @@ static bool summarize(struct tdb_context *tdb, ...@@ -121,6 +124,7 @@ static bool summarize(struct tdb_context *tdb,
"Smallest/average/largest uncoalesced runs: %zu/%zu/%zu\n%s" \ "Smallest/average/largest uncoalesced runs: %zu/%zu/%zu\n%s" \
"Number of free lists: %zu\n%s" \ "Number of free lists: %zu\n%s" \
"Toplevel hash used: %u of %u\n" \ "Toplevel hash used: %u of %u\n" \
"Number of chains: %zu\n" \
"Number of subhashes: %zu\n" \ "Number of subhashes: %zu\n" \
"Smallest/average/largest subhash entries: %zu/%zu/%zu\n%s" \ "Smallest/average/largest subhash entries: %zu/%zu/%zu\n%s" \
"Percentage keys/data/padding/free/rechdrs/freehdrs/hashes: %.0f/%.0f/%.0f/%.0f/%.0f/%.0f/%.0f\n" "Percentage keys/data/padding/free/rechdrs/freehdrs/hashes: %.0f/%.0f/%.0f/%.0f/%.0f/%.0f/%.0f\n"
...@@ -139,7 +143,7 @@ char *tdb_summary(struct tdb_context *tdb, enum tdb_summary_flags flags) ...@@ -139,7 +143,7 @@ char *tdb_summary(struct tdb_context *tdb, enum tdb_summary_flags flags)
{ {
tdb_len_t len; tdb_len_t len;
struct tally *flists, *hashes, *freet, *keys, *data, *extra, *uncoal, struct tally *flists, *hashes, *freet, *keys, *data, *extra, *uncoal,
*buckets; *buckets, *chains;
char *hashesg, *freeg, *keysg, *datag, *extrag, *uncoalg, *bucketsg; char *hashesg, *freeg, *keysg, *datag, *extrag, *uncoalg, *bucketsg;
char *ret = NULL; char *ret = NULL;
...@@ -162,15 +166,16 @@ char *tdb_summary(struct tdb_context *tdb, enum tdb_summary_flags flags) ...@@ -162,15 +166,16 @@ char *tdb_summary(struct tdb_context *tdb, enum tdb_summary_flags flags)
extra = tally_new(HISTO_HEIGHT); extra = tally_new(HISTO_HEIGHT);
uncoal = tally_new(HISTO_HEIGHT); uncoal = tally_new(HISTO_HEIGHT);
buckets = tally_new(HISTO_HEIGHT); buckets = tally_new(HISTO_HEIGHT);
chains = tally_new(HISTO_HEIGHT);
if (!flists || !hashes || !freet || !keys || !data || !extra if (!flists || !hashes || !freet || !keys || !data || !extra
|| !uncoal || !buckets) { || !uncoal || !buckets || !chains) {
tdb_logerr(tdb, TDB_ERR_OOM, TDB_DEBUG_ERROR, tdb_logerr(tdb, TDB_ERR_OOM, TDB_DEBUG_ERROR,
"tdb_summary: failed to allocate tally structures"); "tdb_summary: failed to allocate tally structures");
goto unlock; goto unlock;
} }
if (!summarize(tdb, hashes, flists, freet, keys, data, extra, uncoal, if (!summarize(tdb, hashes, flists, freet, keys, data, extra, uncoal,
buckets)) buckets, chains))
goto unlock; goto unlock;
if (flags & TDB_SUMMARY_HISTOGRAMS) { if (flags & TDB_SUMMARY_HISTOGRAMS) {
...@@ -218,6 +223,7 @@ char *tdb_summary(struct tdb_context *tdb, enum tdb_summary_flags flags) ...@@ -218,6 +223,7 @@ char *tdb_summary(struct tdb_context *tdb, enum tdb_summary_flags flags)
count_hash(tdb, offsetof(struct tdb_header, hashtable), count_hash(tdb, offsetof(struct tdb_header, hashtable),
TDB_TOPLEVEL_HASH_BITS), TDB_TOPLEVEL_HASH_BITS),
1 << TDB_TOPLEVEL_HASH_BITS, 1 << TDB_TOPLEVEL_HASH_BITS,
tally_num(chains),
tally_num(hashes), tally_num(hashes),
tally_min(hashes), tally_mean(hashes), tally_max(hashes), tally_min(hashes), tally_mean(hashes), tally_max(hashes),
hashesg ? hashesg : "", hashesg ? hashesg : "",
...@@ -231,7 +237,8 @@ char *tdb_summary(struct tdb_context *tdb, enum tdb_summary_flags flags) ...@@ -231,7 +237,8 @@ char *tdb_summary(struct tdb_context *tdb, enum tdb_summary_flags flags)
* 100.0 / tdb->map_size, * 100.0 / tdb->map_size,
(tally_num(hashes) (tally_num(hashes)
* (sizeof(tdb_off_t) << TDB_SUBLEVEL_HASH_BITS) * (sizeof(tdb_off_t) << TDB_SUBLEVEL_HASH_BITS)
+ (sizeof(tdb_off_t) << TDB_TOPLEVEL_HASH_BITS)) + (sizeof(tdb_off_t) << TDB_TOPLEVEL_HASH_BITS)
+ sizeof(struct tdb_chain) * tally_num(chains))
* 100.0 / tdb->map_size); * 100.0 / tdb->map_size);
unlock: unlock:
...@@ -249,6 +256,7 @@ unlock: ...@@ -249,6 +256,7 @@ unlock:
free(data); free(data);
free(extra); free(extra);
free(uncoal); free(uncoal);
free(chains);
tdb_allrecord_unlock(tdb, F_RDLCK); tdb_allrecord_unlock(tdb, F_RDLCK);
tdb_unlock_expand(tdb, F_RDLCK); tdb_unlock_expand(tdb, F_RDLCK);
......
...@@ -118,6 +118,7 @@ struct tdb_attribute_stats { ...@@ -118,6 +118,7 @@ struct tdb_attribute_stats {
size_t size; /* = sizeof(struct tdb_attribute_stats) */ size_t size; /* = sizeof(struct tdb_attribute_stats) */
uint64_t allocs; uint64_t allocs;
uint64_t alloc_subhash; uint64_t alloc_subhash;
uint64_t alloc_chain;
uint64_t alloc_bucket_exact; uint64_t alloc_bucket_exact;
uint64_t alloc_bucket_max; uint64_t alloc_bucket_max;
uint64_t alloc_leftover; uint64_t alloc_leftover;
......
#include <ccan/tdb2/tdb.c>
#include <ccan/tdb2/free.c>
#include <ccan/tdb2/lock.c>
#include <ccan/tdb2/io.c>
#include <ccan/tdb2/hash.c>
#include <ccan/tdb2/transaction.c>
#include <ccan/tdb2/traverse.c>
#include <ccan/tdb2/check.c>
#include <ccan/tap/tap.h>
#include "logging.h"
static uint64_t badhash(const void *key, size_t len, uint64_t seed, void *priv)
{
return 0;
}
static int trav(struct tdb_context *tdb, TDB_DATA key, TDB_DATA dbuf, void *p)
{
if (p)
return tdb_delete(tdb, key);
return 0;
}
int main(int argc, char *argv[])
{
unsigned int i, j;
struct tdb_context *tdb;
struct tdb_data key = { (unsigned char *)&j, sizeof(j) };
struct tdb_data dbuf = { (unsigned char *)&j, sizeof(j) };
union tdb_attribute hattr = { .hash = { .base = { TDB_ATTRIBUTE_HASH },
.hash_fn = badhash } };
int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP,
TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT,
TDB_NOMMAP|TDB_CONVERT,
};
hattr.base.next = &tap_log_attr;
plan_tests(5395);
for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
struct tdb_data d;
tdb = tdb_open("run-25-hashoverload.tdb", flags[i],
O_RDWR|O_CREAT|O_TRUNC, 0600, &hattr);
ok1(tdb);
if (!tdb)
continue;
/* Fill a group. */
for (j = 0; j < (1 << TDB_HASH_GROUP_BITS); j++) {
ok1(tdb_store(tdb, key, dbuf, TDB_INSERT) == 0);
}
ok1(tdb_check(tdb, NULL, NULL) == 0);
/* Now store one last value: should form chain. */
ok1(tdb_store(tdb, key, dbuf, TDB_INSERT) == 0);
ok1(tdb_check(tdb, NULL, NULL) == 0);
/* Check we can find them all. */
for (j = 0; j < (1 << TDB_HASH_GROUP_BITS) + 1; j++) {
d = tdb_fetch(tdb, key);
ok1(d.dsize == sizeof(j));
ok1(d.dptr != NULL);
ok1(d.dptr && memcmp(d.dptr, &j, d.dsize) == 0);
}
/* Now add a *lot* more. */
for (j = (1 << TDB_HASH_GROUP_BITS) + 1;
j < (16 << TDB_HASH_GROUP_BITS);
j++) {
ok1(tdb_store(tdb, key, dbuf, TDB_INSERT) == 0);
d = tdb_fetch(tdb, key);
ok1(d.dsize == sizeof(j));
ok1(d.dptr != NULL);
ok1(d.dptr && memcmp(d.dptr, &j, d.dsize) == 0);
}
ok1(tdb_check(tdb, NULL, NULL) == 0);
/* Traverse through them. */
ok1(tdb_traverse(tdb, trav, NULL) == j);
/* Empty the first chain-worth. */
for (j = 0; j < (1 << TDB_HASH_GROUP_BITS); j++)
ok1(tdb_delete(tdb, key) == 0);
ok1(tdb_check(tdb, NULL, NULL) == 0);
for (j = (1 << TDB_HASH_GROUP_BITS);
j < (16 << TDB_HASH_GROUP_BITS);
j++) {
d = tdb_fetch(tdb, key);
ok1(d.dsize == sizeof(j));
ok1(d.dptr != NULL);
ok1(d.dptr && memcmp(d.dptr, &j, d.dsize) == 0);
}
/* Traverse through them. */
ok1(tdb_traverse(tdb, trav, NULL)
== (15 << TDB_HASH_GROUP_BITS));
/* Re-add */
for (j = 0; j < (1 << TDB_HASH_GROUP_BITS); j++) {
ok1(tdb_store(tdb, key, dbuf, TDB_INSERT) == 0);
}
ok1(tdb_check(tdb, NULL, NULL) == 0);
/* Now try deleting as we go. */
ok1(tdb_traverse(tdb, trav, trav)
== (16 << TDB_HASH_GROUP_BITS));
ok1(tdb_check(tdb, NULL, NULL) == 0);
ok1(tdb_traverse(tdb, trav, NULL) == 0);
tdb_close(tdb);
}
ok1(tap_log_messages == 0);
return exit_status();
}
...@@ -56,7 +56,6 @@ static int trav(struct tdb_context *tdb, TDB_DATA key, TDB_DATA dbuf, void *p) ...@@ -56,7 +56,6 @@ static int trav(struct tdb_context *tdb, TDB_DATA key, TDB_DATA dbuf, void *p)
td->high = val; td->high = val;
if (td->delete) { if (td->delete) {
if (tdb_delete(tdb, key) != 0) { if (tdb_delete(tdb, key) != 0) {
td->delete_error = tdb_error(tdb); td->delete_error = tdb_error(tdb);
return -1; return -1;
......
...@@ -49,6 +49,8 @@ static void dump_and_clear_stats(struct tdb_attribute_stats *stats) ...@@ -49,6 +49,8 @@ static void dump_and_clear_stats(struct tdb_attribute_stats *stats)
(unsigned long long)stats->allocs); (unsigned long long)stats->allocs);
printf(" alloc_subhash = %llu\n", printf(" alloc_subhash = %llu\n",
(unsigned long long)stats->alloc_subhash); (unsigned long long)stats->alloc_subhash);
printf(" alloc_chain = %llu\n",
(unsigned long long)stats->alloc_chain);
printf(" alloc_bucket_exact = %llu\n", printf(" alloc_bucket_exact = %llu\n",
(unsigned long long)stats->alloc_bucket_exact); (unsigned long long)stats->alloc_bucket_exact);
printf(" alloc_bucket_max = %llu\n", printf(" alloc_bucket_max = %llu\n",
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment