Commit 024a5647 authored by Rusty Russell's avatar Rusty Russell

tdb2: use counters to decide when to coalesce records.

This simply uses a 7 bit counter which gets incremented on each addition
to the list (but not decremented on removals).  When it wraps, we walk the
entire list looking for things to coalesce.

This causes performance problems, especially when appending records, so
we limit it in the next patch:

Before:
$ time ./growtdb-bench 250000 10 > /dev/null && ls -l /tmp/growtdb.tdb && time ./tdbtorture -s 0 && ls -l torture.tdb && ./speed --transaction 2000000
real	0m59.687s
user	0m11.593s
sys	0m4.100s
-rw------- 1 rusty rusty 752004064 2011-04-27 21:14 /tmp/growtdb.tdb
testing with 3 processes, 5000 loops, seed=0
OK

real	1m17.738s
user	0m0.348s
sys	0m0.580s
-rw------- 1 rusty rusty 663360 2011-04-27 21:15 torture.tdb
Adding 2000000 records:  926 ns (110556088 bytes)
Finding 2000000 records:  592 ns (110556088 bytes)
Missing 2000000 records:  416 ns (110556088 bytes)
Traversing 2000000 records:  422 ns (110556088 bytes)
Deleting 2000000 records:  741 ns (244003768 bytes)
Re-adding 2000000 records:  799 ns (244003768 bytes)
Appending 2000000 records:  1147 ns (295244592 bytes)
Churning 2000000 records:  1827 ns (568411440 bytes)

After:
$ time ./growtdb-bench 250000 10 > /dev/null && ls -l /tmp/growtdb.tdb && time ./tdbtorture -s 0 && ls -l torture.tdb && ./speed --transaction 2000000
real	1m17.022s
user	0m27.206s
sys	0m3.920s
-rw------- 1 rusty rusty 570130576 2011-04-27 21:17 /tmp/growtdb.tdb
testing with 3 processes, 5000 loops, seed=0
OK

real	1m27.355s
user	0m0.296s
sys	0m0.516s
-rw------- 1 rusty rusty 617352 2011-04-27 21:18 torture.tdb
Adding 2000000 records:  890 ns (110556088 bytes)
Finding 2000000 records:  565 ns (110556088 bytes)
Missing 2000000 records:  390 ns (110556088 bytes)
Traversing 2000000 records:  410 ns (110556088 bytes)
Deleting 2000000 records:  8623 ns (244003768 bytes)
Re-adding 2000000 records:  7089 ns (244003768 bytes)
Appending 2000000 records:  33708 ns (244003768 bytes)
Churning 2000000 records:  2029 ns (268404160 bytes)
parent a8b30ad4
......@@ -533,11 +533,13 @@ static enum TDB_ERROR check_free_table(struct tdb_context *tdb,
h = bucket_off(ftable_off, i);
for (off = tdb_read_off(tdb, h); off; off = f.next) {
if (!first)
first = off;
if (TDB_OFF_IS_ERR(off)) {
return off;
}
if (!first) {
off &= TDB_OFF_MASK;
first = off;
}
ecode = tdb_read_convert(tdb, off, &f, sizeof(f));
if (ecode != TDB_SUCCESS) {
return ecode;
......
This diff is collapsed.
......@@ -466,7 +466,8 @@ tdb_off_t alloc(struct tdb_context *tdb, size_t keylen, size_t datalen,
/* Put this record in a free list. */
enum TDB_ERROR add_free_record(struct tdb_context *tdb,
tdb_off_t off, tdb_len_t len_with_header,
enum tdb_lock_flags waitflag);
enum tdb_lock_flags waitflag,
bool coalesce_ok);
/* Set up header for a used/ftable/htable/chain record. */
enum TDB_ERROR set_header(struct tdb_context *tdb,
......
......@@ -42,7 +42,7 @@ static enum TDB_ERROR replace_data(struct tdb_context *tdb,
ecode = add_free_record(tdb, old_off,
sizeof(struct tdb_used_record)
+ key.dsize + old_room,
TDB_LOCK_WAIT);
TDB_LOCK_WAIT, true);
if (ecode == TDB_SUCCESS)
ecode = replace_in_hash(tdb, h, new_off);
} else {
......@@ -292,7 +292,7 @@ enum TDB_ERROR tdb_delete(struct tdb_context *tdb, struct tdb_data key)
+ rec_key_length(&rec)
+ rec_data_length(&rec)
+ rec_extra_padding(&rec),
TDB_LOCK_WAIT);
TDB_LOCK_WAIT, true);
if (tdb->flags & TDB_SEQNUM)
tdb_inc_seqnum(tdb);
......
......@@ -150,7 +150,7 @@ static void add_to_freetable(struct tdb_context *tdb,
tdb->ftable_off = freetable->base.off;
tdb->ftable = ftable;
add_free_record(tdb, eoff, sizeof(struct tdb_used_record) + elen,
TDB_LOCK_WAIT);
TDB_LOCK_WAIT, false);
}
static tdb_off_t hbucket_off(tdb_off_t group_start, unsigned ingroup)
......
......@@ -52,7 +52,7 @@ int main(int argc, char *argv[])
/* Lock and fail to coalesce. */
ok1(tdb_lock_free_bucket(tdb, b_off, TDB_LOCK_WAIT) == 0);
test = layout->elem[1].base.off;
ok1(coalesce(tdb, layout->elem[1].base.off, b_off, len, &test, &test)
ok1(coalesce(tdb, layout->elem[1].base.off, b_off, len, &test)
== 0);
tdb_unlock_free_bucket(tdb, b_off);
ok1(free_record_length(tdb, layout->elem[1].base.off) == len);
......@@ -75,7 +75,7 @@ int main(int argc, char *argv[])
/* Lock and fail to coalesce. */
ok1(tdb_lock_free_bucket(tdb, b_off, TDB_LOCK_WAIT) == 0);
test = layout->elem[1].base.off;
ok1(coalesce(tdb, layout->elem[1].base.off, b_off, 1024, &test, &test)
ok1(coalesce(tdb, layout->elem[1].base.off, b_off, 1024, &test)
== 0);
tdb_unlock_free_bucket(tdb, b_off);
ok1(free_record_length(tdb, layout->elem[1].base.off) == 1024);
......@@ -99,7 +99,7 @@ int main(int argc, char *argv[])
/* Lock and coalesce. */
ok1(tdb_lock_free_bucket(tdb, b_off, TDB_LOCK_WAIT) == 0);
test = layout->elem[2].base.off;
ok1(coalesce(tdb, layout->elem[1].base.off, b_off, 1024, &test, &test)
ok1(coalesce(tdb, layout->elem[1].base.off, b_off, 1024, &test)
== 1024 + sizeof(struct tdb_used_record) + 2048);
/* Should tell us it's erased this one... */
ok1(test == TDB_ERR_NOEXIST);
......@@ -126,7 +126,7 @@ int main(int argc, char *argv[])
/* Lock and coalesce. */
ok1(tdb_lock_free_bucket(tdb, b_off, TDB_LOCK_WAIT) == 0);
test = layout->elem[2].base.off;
ok1(coalesce(tdb, layout->elem[1].base.off, b_off, 1024, &test, &test)
ok1(coalesce(tdb, layout->elem[1].base.off, b_off, 1024, &test)
== 1024 + sizeof(struct tdb_used_record) + 512);
ok1(tdb->file->allrecord_lock.count == 0 && tdb->file->num_lockrecs == 0);
ok1(free_record_length(tdb, layout->elem[1].base.off)
......@@ -153,7 +153,7 @@ int main(int argc, char *argv[])
/* Lock and coalesce. */
ok1(tdb_lock_free_bucket(tdb, b_off, TDB_LOCK_WAIT) == 0);
test = layout->elem[2].base.off;
ok1(coalesce(tdb, layout->elem[1].base.off, b_off, 1024, &test, &test)
ok1(coalesce(tdb, layout->elem[1].base.off, b_off, 1024, &test)
== 1024 + sizeof(struct tdb_used_record) + 512
+ sizeof(struct tdb_used_record) + 256);
ok1(tdb->file->allrecord_lock.count == 0
......
......@@ -177,7 +177,7 @@ int main(int argc, char *argv[])
+ rec_key_length(&rec)
+ rec_data_length(&rec)
+ rec_extra_padding(&rec),
TDB_LOCK_NOWAIT) == 0);
TDB_LOCK_NOWAIT, false) == 0);
ok1(tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range,
F_WRLCK) == 0);
ok1(tdb_check(tdb, NULL, NULL) == 0);
......
......@@ -43,11 +43,11 @@ int main(int argc, char *argv[])
/* This makes a sparse file */
ok1(ftruncate(tdb->file->fd, 0xFFFFFFF0) == 0);
ok1(add_free_record(tdb, old_size, 0xFFFFFFF0 - old_size,
TDB_LOCK_WAIT) == TDB_SUCCESS);
TDB_LOCK_WAIT, false) == TDB_SUCCESS);
/* Now add a little record past the 4G barrier. */
ok1(tdb_expand_file(tdb, 100) == TDB_SUCCESS);
ok1(add_free_record(tdb, 0xFFFFFFF0, 100, TDB_LOCK_WAIT)
ok1(add_free_record(tdb, 0xFFFFFFF0, 100, TDB_LOCK_WAIT, false)
== TDB_SUCCESS);
ok1(tdb_check(tdb, NULL, NULL) == TDB_SUCCESS);
......
......@@ -689,7 +689,7 @@ static enum TDB_ERROR tdb_recovery_allocate(struct tdb_context *tdb,
tdb->stats.frees++;
ecode = add_free_record(tdb, recovery_head,
sizeof(rec) + rec.max_len,
TDB_LOCK_WAIT);
TDB_LOCK_WAIT, true);
if (ecode != TDB_SUCCESS) {
return tdb_logerr(tdb, ecode, TDB_LOG_ERROR,
"tdb_recovery_allocate:"
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment