Commit 84a19b9b authored by Rusty Russell's avatar Rusty Russell

tdb2: expand lock now nests inside other locks.

This eliminates all the "drop lock and retry" logic: we expand inside alloc().
parent 1a24a870
...@@ -533,53 +533,6 @@ int set_header(struct tdb_context *tdb, ...@@ -533,53 +533,6 @@ int set_header(struct tdb_context *tdb,
return 0; return 0;
} }
static tdb_len_t adjust_size(size_t keylen, size_t datalen, bool growing)
{
tdb_len_t size = keylen + datalen;
if (size < MIN_DATA_LEN)
size = MIN_DATA_LEN;
/* Overallocate if this is coming from an enlarging store. */
if (growing)
size += datalen / 2;
/* Round to next uint64_t boundary. */
return (size + (sizeof(uint64_t) - 1ULL)) & ~(sizeof(uint64_t) - 1ULL);
}
/* If this fails, try tdb_expand. */
tdb_off_t alloc(struct tdb_context *tdb, size_t keylen, size_t datalen,
uint64_t hash, bool growing)
{
tdb_off_t off;
tdb_len_t size, actual;
struct tdb_used_record rec;
/* We don't want header to change during this! */
assert(tdb->header_uptodate);
size = adjust_size(keylen, datalen, growing);
off = get_free(tdb, size, &actual);
if (unlikely(off == TDB_OFF_ERR || off == 0))
return off;
/* Some supergiant values can't be encoded. */
/* FIXME: Check before, and limit actual in get_free. */
if (set_header(tdb, &rec, keylen, datalen, actual, hash,
tdb->zhdr.zone_bits) != 0) {
add_free_record(tdb, tdb->zhdr.zone_bits, off,
sizeof(rec) + actual);
return TDB_OFF_ERR;
}
if (tdb_write_convert(tdb, off, &rec, sizeof(rec)) != 0)
return TDB_OFF_ERR;
return off;
}
static bool zones_happy(struct tdb_context *tdb) static bool zones_happy(struct tdb_context *tdb)
{ {
/* FIXME: look at distribution of zones. */ /* FIXME: look at distribution of zones. */
...@@ -594,8 +547,7 @@ static tdb_len_t overhead(unsigned int zone_bits) ...@@ -594,8 +547,7 @@ static tdb_len_t overhead(unsigned int zone_bits)
} }
/* Expand the database (by adding a zone). */ /* Expand the database (by adding a zone). */
int tdb_expand(struct tdb_context *tdb, tdb_len_t klen, tdb_len_t dlen, static int tdb_expand(struct tdb_context *tdb, tdb_len_t size)
bool growing)
{ {
uint64_t old_size; uint64_t old_size;
tdb_off_t off; tdb_off_t off;
...@@ -606,8 +558,7 @@ int tdb_expand(struct tdb_context *tdb, tdb_len_t klen, tdb_len_t dlen, ...@@ -606,8 +558,7 @@ int tdb_expand(struct tdb_context *tdb, tdb_len_t klen, tdb_len_t dlen,
bool enlarge_zone; bool enlarge_zone;
/* We need room for the record header too. */ /* We need room for the record header too. */
wanted = sizeof(struct tdb_used_record) wanted = sizeof(struct tdb_used_record) + size;
+ (adjust_size(klen, dlen, growing)<<TDB_COMFORT_FACTOR_BITS);
/* Only one person can expand file at a time. */ /* Only one person can expand file at a time. */
if (tdb_lock_expand(tdb, F_WRLCK) != 0) if (tdb_lock_expand(tdb, F_WRLCK) != 0)
...@@ -667,6 +618,10 @@ int tdb_expand(struct tdb_context *tdb, tdb_len_t klen, tdb_len_t dlen, ...@@ -667,6 +618,10 @@ int tdb_expand(struct tdb_context *tdb, tdb_len_t klen, tdb_len_t dlen,
if (add_free_record(tdb, zone_bits, off, tdb->map_size-1-off) == -1) if (add_free_record(tdb, zone_bits, off, tdb->map_size-1-off) == -1)
goto fail; goto fail;
/* Try allocating from this zone now. */
tdb->zone_off = old_size - 1;
tdb->zhdr = zhdr;
success: success:
tdb_unlock_expand(tdb, F_WRLCK); tdb_unlock_expand(tdb, F_WRLCK);
return 0; return 0;
...@@ -675,3 +630,57 @@ fail: ...@@ -675,3 +630,57 @@ fail:
tdb_unlock_expand(tdb, F_WRLCK); tdb_unlock_expand(tdb, F_WRLCK);
return -1; return -1;
} }
static tdb_len_t adjust_size(size_t keylen, size_t datalen, bool growing)
{
tdb_len_t size = keylen + datalen;
if (size < MIN_DATA_LEN)
size = MIN_DATA_LEN;
/* Overallocate if this is coming from an enlarging store. */
if (growing)
size += datalen / 2;
/* Round to next uint64_t boundary. */
return (size + (sizeof(uint64_t) - 1ULL)) & ~(sizeof(uint64_t) - 1ULL);
}
/* This won't fail: it will expand the database if it has to. */
tdb_off_t alloc(struct tdb_context *tdb, size_t keylen, size_t datalen,
uint64_t hash, bool growing)
{
tdb_off_t off;
tdb_len_t size, actual;
struct tdb_used_record rec;
/* We don't want header to change during this! */
assert(tdb->header_uptodate);
size = adjust_size(keylen, datalen, growing);
again:
off = get_free(tdb, size, &actual);
if (unlikely(off == TDB_OFF_ERR))
return off;
if (unlikely(off == 0)) {
if (tdb_expand(tdb, size) == -1)
return TDB_OFF_ERR;
goto again;
}
/* Some supergiant values can't be encoded. */
/* FIXME: Check before, and limit actual in get_free. */
if (set_header(tdb, &rec, keylen, datalen, actual, hash,
tdb->zhdr.zone_bits) != 0) {
add_free_record(tdb, tdb->zhdr.zone_bits, off,
sizeof(rec) + actual);
return TDB_OFF_ERR;
}
if (tdb_write_convert(tdb, off, &rec, sizeof(rec)) != 0)
return TDB_OFF_ERR;
return off;
}
...@@ -405,9 +405,6 @@ void tdb_unlock_open(struct tdb_context *tdb); ...@@ -405,9 +405,6 @@ void tdb_unlock_open(struct tdb_context *tdb);
int tdb_lock_expand(struct tdb_context *tdb, int ltype); int tdb_lock_expand(struct tdb_context *tdb, int ltype);
void tdb_unlock_expand(struct tdb_context *tdb, int ltype); void tdb_unlock_expand(struct tdb_context *tdb, int ltype);
/* Expand the file. */
int tdb_expand(struct tdb_context *tdb, tdb_len_t klen, tdb_len_t dlen,
bool growing);
#if 0 #if 0
/* Low-level locking primitives. */ /* Low-level locking primitives. */
......
...@@ -624,17 +624,11 @@ static void enlarge_hash(struct tdb_context *tdb) ...@@ -624,17 +624,11 @@ static void enlarge_hash(struct tdb_context *tdb)
if ((1ULL << tdb->header.v.hash_bits) != num) if ((1ULL << tdb->header.v.hash_bits) != num)
goto unlock; goto unlock;
again:
/* Allocate our new array. */ /* Allocate our new array. */
hlen = num * sizeof(tdb_off_t) * 2; hlen = num * sizeof(tdb_off_t) * 2;
newoff = alloc(tdb, 0, hlen, 0, false); newoff = alloc(tdb, 0, hlen, 0, false);
if (unlikely(newoff == TDB_OFF_ERR)) if (unlikely(newoff == TDB_OFF_ERR))
goto unlock; goto unlock;
if (unlikely(newoff == 0)) {
if (tdb_expand(tdb, 0, hlen, false) == -1)
goto unlock;
goto again;
}
/* Step over record header! */ /* Step over record header! */
newoff += sizeof(struct tdb_used_record); newoff += sizeof(struct tdb_used_record);
...@@ -755,7 +749,7 @@ static tdb_off_t find_and_lock(struct tdb_context *tdb, ...@@ -755,7 +749,7 @@ static tdb_off_t find_and_lock(struct tdb_context *tdb,
bucket, rec); bucket, rec);
} }
/* Returns -1 on error, 0 on OK, 1 on "expand and retry." */ /* Returns -1 on error, 0 on OK" */
static int replace_data(struct tdb_context *tdb, static int replace_data(struct tdb_context *tdb,
uint64_t h, struct tdb_data key, struct tdb_data dbuf, uint64_t h, struct tdb_data key, struct tdb_data dbuf,
tdb_off_t bucket, tdb_off_t bucket,
...@@ -770,9 +764,6 @@ static int replace_data(struct tdb_context *tdb, ...@@ -770,9 +764,6 @@ static int replace_data(struct tdb_context *tdb,
if (unlikely(new_off == TDB_OFF_ERR)) if (unlikely(new_off == TDB_OFF_ERR))
return -1; return -1;
if (unlikely(new_off == 0))
return 1;
/* We didn't like the existing one: remove it. */ /* We didn't like the existing one: remove it. */
if (old_off) if (old_off)
add_free_record(tdb, old_zone, old_off, add_free_record(tdb, old_zone, old_off,
...@@ -849,13 +840,6 @@ int tdb_store(struct tdb_context *tdb, ...@@ -849,13 +840,6 @@ int tdb_store(struct tdb_context *tdb,
rec_zone_bits(&rec), off != 0); rec_zone_bits(&rec), off != 0);
unlock_lists(tdb, start, num, F_WRLCK); unlock_lists(tdb, start, num, F_WRLCK);
if (unlikely(ret == 1)) {
/* Expand, then try again... */
if (tdb_expand(tdb, key.dsize, dbuf.dsize, off != 0) == -1)
return -1;
return tdb_store(tdb, key, dbuf, flag);
}
/* FIXME: by simple simulation, this approximated 60% full. /* FIXME: by simple simulation, this approximated 60% full.
* Check in real case! */ * Check in real case! */
if (unlikely(num > 4 * tdb->header.v.hash_bits - 30)) if (unlikely(num > 4 * tdb->header.v.hash_bits - 30))
...@@ -933,13 +917,6 @@ int tdb_append(struct tdb_context *tdb, ...@@ -933,13 +917,6 @@ int tdb_append(struct tdb_context *tdb,
unlock_lists(tdb, start, num, F_WRLCK); unlock_lists(tdb, start, num, F_WRLCK);
free(newdata); free(newdata);
if (unlikely(ret == 1)) {
/* Expand, then try again. */
if (tdb_expand(tdb, key.dsize, dbuf.dsize, true) == -1)
return -1;
return tdb_append(tdb, key, dbuf);
}
/* FIXME: by simple simulation, this approximated 60% full. /* FIXME: by simple simulation, this approximated 60% full.
* Check in real case! */ * Check in real case! */
if (unlikely(num > 4 * tdb->header.v.hash_bits - 30)) if (unlikely(num > 4 * tdb->header.v.hash_bits - 30))
......
...@@ -26,33 +26,33 @@ int main(int argc, char *argv[]) ...@@ -26,33 +26,33 @@ int main(int argc, char *argv[])
/* First expand. Should add a zone, doubling file size.. */ /* First expand. Should add a zone, doubling file size.. */
val = tdb->map_size - 1 - sizeof(struct tdb_header); val = tdb->map_size - 1 - sizeof(struct tdb_header);
ok1(tdb_expand(tdb, 1, 1, false) == 0); ok1(tdb_expand(tdb, 1) == 0);
ok1(tdb->map_size == 2 * val + 1 + sizeof(struct tdb_header)); ok1(tdb->map_size == 2 * val + 1 + sizeof(struct tdb_header));
ok1(tdb_check(tdb, NULL, NULL) == 0); ok1(tdb_check(tdb, NULL, NULL) == 0);
/* Second expand, add another zone of same size. */ /* Second expand, add another zone of same size. */
ok1(tdb_expand(tdb, 1, 1, false) == 0); ok1(tdb_expand(tdb, 1) == 0);
ok1(tdb->map_size == 3 * val + 1 + sizeof(struct tdb_header)); ok1(tdb->map_size == 3 * val + 1 + sizeof(struct tdb_header));
ok1(tdb_check(tdb, NULL, NULL) == 0); ok1(tdb_check(tdb, NULL, NULL) == 0);
/* Large expand, but can only add 4th zone of same size. */ /* Large expand, but can only add 4th zone of same size. */
ok1(tdb_expand(tdb, 0, 4*val, false) == 0); ok1(tdb_expand(tdb, 4*val) == 0);
ok1(tdb->map_size == 4 * val + 1 + sizeof(struct tdb_header)); ok1(tdb->map_size == 4 * val + 1 + sizeof(struct tdb_header));
ok1(tdb_check(tdb, NULL, NULL) == 0); ok1(tdb_check(tdb, NULL, NULL) == 0);
/* Large expand now will double file. */ /* Large expand now will double file. */
ok1(tdb_expand(tdb, 0, 4*val, false) == 0); ok1(tdb_expand(tdb, 4*val) == 0);
ok1(tdb->map_size == 8 * val + 1 + sizeof(struct tdb_header)); ok1(tdb->map_size == 8 * val + 1 + sizeof(struct tdb_header));
ok1(tdb_check(tdb, NULL, NULL) == 0); ok1(tdb_check(tdb, NULL, NULL) == 0);
/* And again? */ /* And again? */
ok1(tdb_expand(tdb, 0, 4*val, false) == 0); ok1(tdb_expand(tdb, 4*val) == 0);
ok1(tdb->map_size == 16 * val + 1 + sizeof(struct tdb_header)); ok1(tdb->map_size == 16 * val + 1 + sizeof(struct tdb_header));
ok1(tdb_check(tdb, NULL, NULL) == 0); ok1(tdb_check(tdb, NULL, NULL) == 0);
/* Below comfort level, will add a single 8*val zone. */ /* Below comfort level, will add a single 8*val zone. */
ok1(tdb_expand(tdb, 0, ((8*val) >> TDB_COMFORT_FACTOR_BITS) ok1(tdb_expand(tdb, ((8*val) >> TDB_COMFORT_FACTOR_BITS)
- sizeof(struct tdb_used_record), false) == 0); - sizeof(struct tdb_used_record)) == 0);
ok1(tdb->map_size == 24 * val + 1 + sizeof(struct tdb_header)); ok1(tdb->map_size == 24 * val + 1 + sizeof(struct tdb_header));
tdb_close(tdb); tdb_close(tdb);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment