Commit 46d8f620 authored by Rusty Russell's avatar Rusty Russell

tdb2: tdb_summary() implementation for tdb2.

parent 8a462e5a
......@@ -78,6 +78,7 @@ int main(int argc, char *argv[])
printf("ccan/likely\n");
printf("ccan/asearch\n");
printf("ccan/build_assert\n");
printf("ccan/tally\n");
return 0;
}
......
/*
Trivial Database 2: human-readable summary code
Copyright (C) Rusty Russell 2010
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 3 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#include "private.h"
#include <assert.h>
#include <ccan/tally/tally.h>
static void sizes_for_bucket(unsigned bucket, size_t *min, size_t *max)
{
if (bucket <= 8) {
*min = *max = TDB_MIN_DATA_LEN + bucket * 8;
} else if (bucket == 9) {
/* FIXME: This is twisted; fix size_to_bucket. */
*min = TDB_MIN_DATA_LEN + (1ULL << (bucket - 3)) + 8;
*max = TDB_MIN_DATA_LEN + (1ULL << (bucket - 2)) - 8;
} else {
*min = TDB_MIN_DATA_LEN + (1ULL << (bucket - 3));
*max = TDB_MIN_DATA_LEN + (1ULL << (bucket - 2)) - 8;
}
assert(size_to_bucket(63, *min) == bucket);
assert(size_to_bucket(63, *max) == bucket);
if (bucket > 8)
assert(size_to_bucket(63, *min - 8) == bucket - 1);
assert(size_to_bucket(63, *max + 8) == bucket + 1);
}
static int count_hash(struct tdb_context *tdb,
tdb_off_t hash_off, unsigned bits)
{
const tdb_off_t *h;
unsigned int i, count = 0;
h = tdb_access_read(tdb, hash_off, sizeof(*h) << bits, true);
if (!h)
return -1;
for (i = 0; i < (1 << bits); i++)
count += (h[i] != 0);
tdb_access_release(tdb, h);
return count;
}
static tdb_len_t summarize_zone(struct tdb_context *tdb, tdb_off_t zone_off,
struct tally *zones,
struct tally *hashes,
struct tally *free,
struct tally *keys,
struct tally *data,
struct tally *extra,
struct tally *uncoal,
uint64_t bucketlen[],
unsigned int *num_buckets)
{
struct free_zone_header zhdr;
tdb_off_t off;
tdb_len_t len;
unsigned int hdrlen;
tdb_len_t unc = 0;
if (tdb_read_convert(tdb, zone_off, &zhdr, sizeof(zhdr)) == -1)
return TDB_OFF_ERR;
tally_add(zones, 1ULL << zhdr.zone_bits);
*num_buckets = BUCKETS_FOR_ZONE(zhdr.zone_bits);
hdrlen = sizeof(zhdr)
+ (BUCKETS_FOR_ZONE(zhdr.zone_bits) + 1) * sizeof(tdb_off_t);
for (off = zone_off + hdrlen;
off < zone_off + (1ULL << zhdr.zone_bits);
off += len) {
union {
struct tdb_used_record u;
struct tdb_free_record f;
} pad, *p;
p = tdb_get(tdb, off, &pad, sizeof(pad));
if (!p)
return TDB_OFF_ERR;
if (frec_magic(&p->f) == TDB_FREE_MAGIC) {
len = p->f.data_len;
tally_add(free, len);
bucketlen[size_to_bucket(frec_zone_bits(&p->f), len)]++;
len += sizeof(p->u);
unc++;
} else {
if (unc) {
tally_add(uncoal, unc);
unc = 0;
}
len = sizeof(p->u)
+ rec_key_length(&p->u)
+ rec_data_length(&p->u)
+ rec_extra_padding(&p->u);
/* FIXME: Use different magic for hashes? */
if (!rec_key_length(&p->u) && !rec_hash(&p->u)) {
int count = count_hash(tdb, off + sizeof(p->u),
TDB_SUBLEVEL_HASH_BITS);
if (count == -1)
return TDB_OFF_ERR;
tally_add(hashes, count);
} else {
tally_add(keys, rec_key_length(&p->u));
tally_add(data, rec_data_length(&p->u));
}
tally_add(extra, rec_extra_padding(&p->u));
}
}
if (unc)
tally_add(uncoal, unc);
return 1ULL << zhdr.zone_bits;
}
#define SUMMARY_FORMAT \
"Size of file/data: %zu/%zu\n" \
"Number of zones: %zu\n" \
"Smallest/average/largest zone size: %zu/%zu/%zu\n%s" \
"Number of records: %zu\n" \
"Smallest/average/largest keys: %zu/%zu/%zu\n%s" \
"Smallest/average/largest data: %zu/%zu/%zu\n%s" \
"Smallest/average/largest padding: %zu/%zu/%zu\n%s" \
"Number of free records: %zu\n" \
"Smallest/average/largest free records: %zu/%zu/%zu\n%s" \
"Number of uncoalesced records: %zu\n" \
"Smallest/average/largest uncoalesced runs: %zu/%zu/%zu\n%s" \
"Toplevel hash used: %u of %u\n" \
"Number of subhashes: %zu\n" \
"Smallest/average/largest subhash entries: %zu/%zu/%zu\n%s" \
"Percentage keys/data/padding/free/rechdrs/zonehdrs/hashes: %.0f/%.0f/%.0f/%.0f/%.0f/%.0f/%.0f\n"
#define BUCKET_SUMMARY_FORMAT_A \
"Free bucket %zu: total entries %zu.\n" \
"Smallest/average/largest length: %zu/%zu/%zu\n%s"
#define BUCKET_SUMMARY_FORMAT_B \
"Free bucket %zu-%zu: total entries %zu.\n" \
"Smallest/average/largest length: %zu/%zu/%zu\n%s"
#define HISTO_WIDTH 70
#define HISTO_HEIGHT 20
char *tdb_summary(struct tdb_context *tdb, enum tdb_summary_flags flags)
{
tdb_off_t off;
tdb_len_t len;
unsigned int i, num_buckets, max_bucket = 0;
uint64_t total_buckets = 0;
struct tally *zones, *hashes, *freet, *keys, *data, *extra, *uncoal,
*buckets[BUCKETS_FOR_ZONE(63)+1] = { NULL };
char *zonesg, *hashesg, *freeg, *keysg, *datag, *extrag, *uncoalg,
*bucketsg[BUCKETS_FOR_ZONE(63)+1] = { NULL };
char *ret = NULL;
if (tdb_allrecord_lock(tdb, F_RDLCK, TDB_LOCK_WAIT, false) != 0)
return NULL;
if (tdb_lock_expand(tdb, F_RDLCK) != 0) {
tdb_allrecord_unlock(tdb, F_RDLCK);
return NULL;
}
/* Start stats off empty. */
zones = tally_new(HISTO_HEIGHT);
hashes = tally_new(HISTO_HEIGHT);
freet = tally_new(HISTO_HEIGHT);
keys = tally_new(HISTO_HEIGHT);
data = tally_new(HISTO_HEIGHT);
extra = tally_new(HISTO_HEIGHT);
uncoal = tally_new(HISTO_HEIGHT);
if (!zones || !hashes || !freet || !keys || !data || !extra
|| !uncoal) {
tdb->ecode = TDB_ERR_OOM;
goto unlock;
}
for (i = 0; i < sizeof(buckets)/sizeof(buckets[0]); i++) {
buckets[i] = tally_new(HISTO_HEIGHT);
if (!buckets[i]) {
tdb->ecode = TDB_ERR_OOM;
goto unlock;
}
}
for (off = sizeof(struct tdb_header);
off < tdb->map_size - 1;
off += len) {
uint64_t bucketlen[BUCKETS_FOR_ZONE(63)+1] = { 0 };
len = summarize_zone(tdb, off, zones, hashes, freet, keys,
data, extra, uncoal, bucketlen,
&num_buckets);
if (len == TDB_OFF_ERR)
goto unlock;
for (i = 0; i < num_buckets; i++)
tally_add(buckets[i], bucketlen[i]);
if (num_buckets > max_bucket)
max_bucket = num_buckets;
total_buckets += num_buckets;
}
if (flags & TDB_SUMMARY_HISTOGRAMS) {
zonesg = tally_histogram(zones, HISTO_WIDTH, HISTO_HEIGHT);
hashesg = tally_histogram(hashes, HISTO_WIDTH, HISTO_HEIGHT);
freeg = tally_histogram(freet, HISTO_WIDTH, HISTO_HEIGHT);
keysg = tally_histogram(keys, HISTO_WIDTH, HISTO_HEIGHT);
datag = tally_histogram(data, HISTO_WIDTH, HISTO_HEIGHT);
extrag = tally_histogram(extra, HISTO_WIDTH, HISTO_HEIGHT);
uncoalg = tally_histogram(uncoal, HISTO_WIDTH, HISTO_HEIGHT);
for (i = 0; i < sizeof(buckets)/sizeof(buckets[0]); i++) {
bucketsg[i] = tally_histogram(buckets[i],
HISTO_WIDTH,
HISTO_HEIGHT);
}
} else {
zonesg = hashesg = freeg = keysg = datag = extrag = uncoalg
= NULL;
}
/* 20 is max length of a %llu. */
len = strlen(SUMMARY_FORMAT) + 33*20 + 1
+ (zonesg ? strlen(zonesg) : 0)
+ (hashesg ? strlen(hashesg) : 0)
+ (freeg ? strlen(freeg) : 0)
+ (keysg ? strlen(keysg) : 0)
+ (datag ? strlen(datag) : 0)
+ (extrag ? strlen(extrag) : 0)
+ (uncoalg ? strlen(uncoalg) : 0);
for (i = 0; i < max_bucket; i++) {
len += strlen(BUCKET_SUMMARY_FORMAT_B) + 6 * 20
+ (bucketsg[i] ? strlen(bucketsg[i]) : 0);
}
ret = malloc(len);
if (!ret)
goto unlock;
len = sprintf(ret, SUMMARY_FORMAT,
(size_t)tdb->map_size,
tally_num(keys) + tally_num(data),
tally_num(zones),
tally_min(zones), tally_mean(zones), tally_max(zones),
zonesg ? zonesg : "",
tally_num(keys),
tally_min(keys), tally_mean(keys), tally_max(keys),
keysg ? keysg : "",
tally_min(data), tally_mean(data), tally_max(data),
datag ? datag : "",
tally_min(extra), tally_mean(extra), tally_max(extra),
extrag ? extrag : "",
tally_num(freet),
tally_min(freet), tally_mean(freet), tally_max(freet),
freeg ? freeg : "",
tally_total(uncoal, NULL),
tally_min(uncoal), tally_mean(uncoal), tally_max(uncoal),
uncoalg ? uncoalg : "",
count_hash(tdb, offsetof(struct tdb_header, hashtable),
TDB_TOPLEVEL_HASH_BITS),
1 << TDB_TOPLEVEL_HASH_BITS,
tally_num(hashes),
tally_min(hashes), tally_mean(hashes), tally_max(hashes),
hashesg ? hashesg : "",
tally_total(keys, NULL) * 100.0 / tdb->map_size,
tally_total(data, NULL) * 100.0 / tdb->map_size,
tally_total(extra, NULL) * 100.0 / tdb->map_size,
tally_total(freet, NULL) * 100.0 / tdb->map_size,
(tally_num(keys) + tally_num(freet) + tally_num(hashes))
* sizeof(struct tdb_used_record) * 100.0 / tdb->map_size,
(tally_num(zones) * sizeof(struct free_zone_header)
+ total_buckets * sizeof(tdb_off_t))
* 100.0 / tdb->map_size,
(tally_num(hashes)
* (sizeof(tdb_off_t) << TDB_SUBLEVEL_HASH_BITS)
+ (sizeof(tdb_off_t) << TDB_TOPLEVEL_HASH_BITS))
* 100.0 / tdb->map_size);
for (i = 0; i < max_bucket; i++) {
size_t min, max;
sizes_for_bucket(i, &min, &max);
if (min == max) {
len += sprintf(ret + len, BUCKET_SUMMARY_FORMAT_A,
min, tally_total(buckets[i], NULL),
tally_min(buckets[i]),
tally_mean(buckets[i]),
tally_max(buckets[i]),
bucketsg[i] ? bucketsg[i] : "");
} else {
len += sprintf(ret + len, BUCKET_SUMMARY_FORMAT_B,
min, max, tally_total(buckets[i], NULL),
tally_min(buckets[i]),
tally_mean(buckets[i]),
tally_max(buckets[i]),
bucketsg[i] ? bucketsg[i] : "");
}
}
unlock:
free(zonesg);
free(hashesg);
free(freeg);
free(keysg);
free(datag);
free(extrag);
free(uncoalg);
free(zones);
free(hashes);
free(freet);
free(keys);
free(data);
free(extra);
free(uncoal);
for (i = 0; i < sizeof(buckets)/sizeof(buckets[0]); i++) {
free(buckets[i]);
free(bucketsg[i]);
}
tdb_allrecord_unlock(tdb, F_RDLCK);
tdb_unlock_expand(tdb, F_RDLCK);
return ret;
}
......@@ -63,6 +63,9 @@ enum TDB_ERROR {TDB_SUCCESS=0, TDB_ERR_CORRUPT, TDB_ERR_IO, TDB_ERR_LOCK,
TDB_ERR_OOM, TDB_ERR_EXISTS, TDB_ERR_NOEXIST,
TDB_ERR_EINVAL, TDB_ERR_RDONLY, TDB_ERR_NESTING };
/* flags for tdb_summary. Logical or to combine. */
enum tdb_summary_flags { TDB_SUMMARY_HISTOGRAMS = 1 };
/* debugging uses one of the following levels */
enum tdb_debug_level {TDB_DEBUG_FATAL = 0, TDB_DEBUG_ERROR,
TDB_DEBUG_WARNING, TDB_DEBUG_TRACE};
......@@ -143,6 +146,8 @@ int tdb_check(struct tdb_context *tdb,
enum TDB_ERROR tdb_error(struct tdb_context *tdb);
const char *tdb_errorstr(struct tdb_context *tdb);
char *tdb_summary(struct tdb_context *tdb, enum tdb_summary_flags flags);
extern struct tdb_data tdb_null;
#ifdef __cplusplus
......
#include <ccan/tdb2/tdb.c>
#include <ccan/tdb2/free.c>
#include <ccan/tdb2/lock.c>
#include <ccan/tdb2/io.c>
#include <ccan/tdb2/hash.c>
#include <ccan/tdb2/check.c>
#include <ccan/tdb2/summary.c>
#include <ccan/tap/tap.h>
#include "logging.h"
int main(int argc, char *argv[])
{
unsigned int i, j;
struct tdb_context *tdb;
int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP,
TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT,
TDB_NOMMAP|TDB_CONVERT };
struct tdb_data key = { (unsigned char *)&j, sizeof(j) };
struct tdb_data data = { (unsigned char *)&j, sizeof(j) };
char *summary;
plan_tests(sizeof(flags) / sizeof(flags[0]) * (1 + 2 * 7) + 1);
for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
tdb = tdb_open("run-summary.tdb", flags[i],
O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
ok1(tdb);
if (!tdb)
continue;
/* Put some stuff in there. */
for (j = 0; j < 500; j++)
if (tdb_store(tdb, key, data, TDB_REPLACE) != 0)
fail("Storing in tdb");
for (j = 0;
j <= TDB_SUMMARY_HISTOGRAMS;
j += TDB_SUMMARY_HISTOGRAMS) {
summary = tdb_summary(tdb, j);
ok1(strstr(summary, "Number of records: 500\n"));
ok1(strstr(summary, "Smallest/average/largest keys: 4/4/4\n"));
ok1(strstr(summary, "Smallest/average/largest data: 4/4/4\n"));
ok1(strstr(summary, "Free bucket 8"));
ok1(strstr(summary, "Free bucket 16"));
ok1(strstr(summary, "Free bucket 24"));
if (j == TDB_SUMMARY_HISTOGRAMS)
ok1(strstr(summary, "|")
&& strstr(summary, "*"));
else
ok1(!strstr(summary, "|")
&& !strstr(summary, "*"));
free(summary);
}
tdb_close(tdb);
}
ok1(tap_log_messages == 0);
return exit_status();
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment