Commit 3835bb95 authored by Rusty Russell's avatar Rusty Russell

tdb2: rework free.c functions to return enum TDB_ERROR.

parent c02f63e6
...@@ -59,7 +59,7 @@ tdb_off_t next_ftable(struct tdb_context *tdb, tdb_off_t ftable) ...@@ -59,7 +59,7 @@ tdb_off_t next_ftable(struct tdb_context *tdb, tdb_off_t ftable)
return tdb_read_off(tdb, ftable + offsetof(struct tdb_freetable,next)); return tdb_read_off(tdb, ftable + offsetof(struct tdb_freetable,next));
} }
int tdb_ftable_init(struct tdb_context *tdb) enum TDB_ERROR tdb_ftable_init(struct tdb_context *tdb)
{ {
/* Use reservoir sampling algorithm to select a free list at random. */ /* Use reservoir sampling algorithm to select a free list at random. */
unsigned int rnd, max = 0, count = 0; unsigned int rnd, max = 0, count = 0;
...@@ -70,8 +70,7 @@ int tdb_ftable_init(struct tdb_context *tdb) ...@@ -70,8 +70,7 @@ int tdb_ftable_init(struct tdb_context *tdb)
while (off) { while (off) {
if (TDB_OFF_IS_ERR(off)) { if (TDB_OFF_IS_ERR(off)) {
tdb->ecode = off; return off;
return -1;
} }
rnd = random(); rnd = random();
...@@ -84,7 +83,7 @@ int tdb_ftable_init(struct tdb_context *tdb) ...@@ -84,7 +83,7 @@ int tdb_ftable_init(struct tdb_context *tdb)
off = next_ftable(tdb, off); off = next_ftable(tdb, off);
count++; count++;
} }
return 0; return TDB_SUCCESS;
} }
/* Offset of a given bucket. */ /* Offset of a given bucket. */
...@@ -105,7 +104,7 @@ static tdb_off_t find_free_head(struct tdb_context *tdb, ...@@ -105,7 +104,7 @@ static tdb_off_t find_free_head(struct tdb_context *tdb,
} }
/* Remove from free bucket. */ /* Remove from free bucket. */
static int remove_from_list(struct tdb_context *tdb, static enum TDB_ERROR remove_from_list(struct tdb_context *tdb,
tdb_off_t b_off, tdb_off_t r_off, tdb_off_t b_off, tdb_off_t r_off,
const struct tdb_free_record *r) const struct tdb_free_record *r)
{ {
...@@ -121,18 +120,17 @@ static int remove_from_list(struct tdb_context *tdb, ...@@ -121,18 +120,17 @@ static int remove_from_list(struct tdb_context *tdb,
#ifdef CCAN_TDB2_DEBUG #ifdef CCAN_TDB2_DEBUG
if (tdb_read_off(tdb, off) != r_off) { if (tdb_read_off(tdb, off) != r_off) {
tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR, return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
"remove_from_list: %llu bad prev in list %llu", "remove_from_list:"
" %llu bad prev in list %llu",
(long long)r_off, (long long)b_off); (long long)r_off, (long long)b_off);
return -1;
} }
#endif #endif
/* r->prev->next = r->next */ /* r->prev->next = r->next */
ecode = tdb_write_off(tdb, off, r->next); ecode = tdb_write_off(tdb, off, r->next);
if (ecode != TDB_SUCCESS) { if (ecode != TDB_SUCCESS) {
tdb->ecode = ecode; return ecode;
return -1;
} }
if (r->next != 0) { if (r->next != 0) {
...@@ -141,24 +139,23 @@ static int remove_from_list(struct tdb_context *tdb, ...@@ -141,24 +139,23 @@ static int remove_from_list(struct tdb_context *tdb,
#ifdef CCAN_TDB2_DEBUG #ifdef CCAN_TDB2_DEBUG
if (tdb_read_off(tdb, off) & TDB_OFF_MASK != r_off) { if (tdb_read_off(tdb, off) & TDB_OFF_MASK != r_off) {
tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR, return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
"remove_from_list: %llu bad list %llu", "remove_from_list:"
" %llu bad list %llu",
(long long)r_off, (long long)b_off); (long long)r_off, (long long)b_off);
return -1;
} }
#endif #endif
ecode = tdb_write_off(tdb, off, r->magic_and_prev); ecode = tdb_write_off(tdb, off, r->magic_and_prev);
if (ecode != TDB_SUCCESS) { if (ecode != TDB_SUCCESS) {
tdb->ecode = ecode; return ecode;
return -1;
} }
} }
return 0; return TDB_SUCCESS;
} }
/* Enqueue in this free bucket. */ /* Enqueue in this free bucket. */
static int enqueue_in_free(struct tdb_context *tdb, static enum TDB_ERROR enqueue_in_free(struct tdb_context *tdb,
tdb_off_t b_off, tdb_off_t b_off,
tdb_off_t off, tdb_off_t off,
tdb_len_t len) tdb_len_t len)
...@@ -176,8 +173,7 @@ static int enqueue_in_free(struct tdb_context *tdb, ...@@ -176,8 +173,7 @@ static int enqueue_in_free(struct tdb_context *tdb,
/* new->next = head. */ /* new->next = head. */
new.next = tdb_read_off(tdb, b_off); new.next = tdb_read_off(tdb, b_off);
if (TDB_OFF_IS_ERR(new.next)) { if (TDB_OFF_IS_ERR(new.next)) {
tdb->ecode = new.next; return new.next;
return -1;
} }
if (new.next) { if (new.next) {
...@@ -186,11 +182,11 @@ static int enqueue_in_free(struct tdb_context *tdb, ...@@ -186,11 +182,11 @@ static int enqueue_in_free(struct tdb_context *tdb,
new.next + offsetof(struct tdb_free_record, new.next + offsetof(struct tdb_free_record,
magic_and_prev)) magic_and_prev))
!= magic) { != magic) {
tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR, return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
"enqueue_in_free: %llu bad head" "enqueue_in_free: %llu bad head"
" prev %llu", " prev %llu",
(long long)new.next, (long long)b_off); (long long)new.next,
return -1; (long long)b_off);
} }
#endif #endif
/* next->prev = new. */ /* next->prev = new. */
...@@ -199,32 +195,24 @@ static int enqueue_in_free(struct tdb_context *tdb, ...@@ -199,32 +195,24 @@ static int enqueue_in_free(struct tdb_context *tdb,
magic_and_prev), magic_and_prev),
off | magic); off | magic);
if (ecode != TDB_SUCCESS) { if (ecode != TDB_SUCCESS) {
tdb->ecode = ecode; return ecode;
return -1;
} }
} }
/* head = new */ /* head = new */
ecode = tdb_write_off(tdb, b_off, off); ecode = tdb_write_off(tdb, b_off, off);
if (ecode != TDB_SUCCESS) { if (ecode != TDB_SUCCESS) {
tdb->ecode = ecode; return ecode;
return -1;
} }
ecode = tdb_write_convert(tdb, off, &new, sizeof(new)); return tdb_write_convert(tdb, off, &new, sizeof(new));
if (ecode != TDB_SUCCESS) {
tdb->ecode = ecode;
return -1;
}
return 0;
} }
/* List need not be locked. */ /* List need not be locked. */
int add_free_record(struct tdb_context *tdb, enum TDB_ERROR add_free_record(struct tdb_context *tdb,
tdb_off_t off, tdb_len_t len_with_header) tdb_off_t off, tdb_len_t len_with_header)
{ {
tdb_off_t b_off; tdb_off_t b_off;
tdb_len_t len; tdb_len_t len;
int ret;
enum TDB_ERROR ecode; enum TDB_ERROR ecode;
assert(len_with_header >= sizeof(struct tdb_free_record)); assert(len_with_header >= sizeof(struct tdb_free_record));
...@@ -234,13 +222,12 @@ int add_free_record(struct tdb_context *tdb, ...@@ -234,13 +222,12 @@ int add_free_record(struct tdb_context *tdb,
b_off = bucket_off(tdb->ftable_off, size_to_bucket(len)); b_off = bucket_off(tdb->ftable_off, size_to_bucket(len));
ecode = tdb_lock_free_bucket(tdb, b_off, TDB_LOCK_WAIT); ecode = tdb_lock_free_bucket(tdb, b_off, TDB_LOCK_WAIT);
if (ecode != TDB_SUCCESS) { if (ecode != TDB_SUCCESS) {
tdb->ecode = ecode; return ecode;
return -1;
} }
ret = enqueue_in_free(tdb, b_off, off, len); ecode = enqueue_in_free(tdb, b_off, off, len);
tdb_unlock_free_bucket(tdb, b_off); tdb_unlock_free_bucket(tdb, b_off);
return ret; return ecode;
} }
static size_t adjust_size(size_t keylen, size_t datalen) static size_t adjust_size(size_t keylen, size_t datalen)
...@@ -290,8 +277,9 @@ static tdb_off_t ftable_offset(struct tdb_context *tdb, unsigned int ftable) ...@@ -290,8 +277,9 @@ static tdb_off_t ftable_offset(struct tdb_context *tdb, unsigned int ftable)
} }
/* Note: we unlock the current bucket if we coalesce or fail. */ /* Note: we unlock the current bucket if we coalesce or fail. */
static int coalesce(struct tdb_context *tdb, static tdb_bool_err coalesce(struct tdb_context *tdb,
tdb_off_t off, tdb_off_t b_off, tdb_len_t data_len) tdb_off_t off, tdb_off_t b_off,
tdb_len_t data_len)
{ {
tdb_off_t end; tdb_off_t end;
struct tdb_free_record rec; struct tdb_free_record rec;
...@@ -307,7 +295,7 @@ static int coalesce(struct tdb_context *tdb, ...@@ -307,7 +295,7 @@ static int coalesce(struct tdb_context *tdb,
r = tdb_access_read(tdb, end, sizeof(*r), true); r = tdb_access_read(tdb, end, sizeof(*r), true);
if (TDB_PTR_IS_ERR(r)) { if (TDB_PTR_IS_ERR(r)) {
tdb->ecode = TDB_PTR_ERR(r); ecode = TDB_PTR_ERR(r);
goto err; goto err;
} }
...@@ -319,7 +307,13 @@ static int coalesce(struct tdb_context *tdb, ...@@ -319,7 +307,13 @@ static int coalesce(struct tdb_context *tdb,
ftable = frec_ftable(r); ftable = frec_ftable(r);
bucket = size_to_bucket(frec_len(r)); bucket = size_to_bucket(frec_len(r));
nb_off = bucket_off(ftable_offset(tdb, ftable), bucket); nb_off = ftable_offset(tdb, ftable);
if (TDB_OFF_IS_ERR(nb_off)) {
tdb_access_release(tdb, r);
ecode = nb_off;
goto err;
}
nb_off = bucket_off(nb_off, bucket);
tdb_access_release(tdb, r); tdb_access_release(tdb, r);
/* We may be violating lock order here, so best effort. */ /* We may be violating lock order here, so best effort. */
...@@ -332,7 +326,6 @@ static int coalesce(struct tdb_context *tdb, ...@@ -332,7 +326,6 @@ static int coalesce(struct tdb_context *tdb,
/* Now we have lock, re-check. */ /* Now we have lock, re-check. */
ecode = tdb_read_convert(tdb, end, &rec, sizeof(rec)); ecode = tdb_read_convert(tdb, end, &rec, sizeof(rec));
if (ecode != TDB_SUCCESS) { if (ecode != TDB_SUCCESS) {
tdb->ecode = ecode;
tdb_unlock_free_bucket(tdb, nb_off); tdb_unlock_free_bucket(tdb, nb_off);
goto err; goto err;
} }
...@@ -350,7 +343,8 @@ static int coalesce(struct tdb_context *tdb, ...@@ -350,7 +343,8 @@ static int coalesce(struct tdb_context *tdb,
break; break;
} }
if (remove_from_list(tdb, nb_off, end, &rec) == -1) { ecode = remove_from_list(tdb, nb_off, end, &rec);
if (ecode != TDB_SUCCESS) {
tdb_unlock_free_bucket(tdb, nb_off); tdb_unlock_free_bucket(tdb, nb_off);
goto err; goto err;
} }
...@@ -362,24 +356,25 @@ static int coalesce(struct tdb_context *tdb, ...@@ -362,24 +356,25 @@ static int coalesce(struct tdb_context *tdb,
/* Didn't find any adjacent free? */ /* Didn't find any adjacent free? */
if (end == off + sizeof(struct tdb_used_record) + data_len) if (end == off + sizeof(struct tdb_used_record) + data_len)
return 0; return false;
/* OK, expand initial record */ /* OK, expand initial record */
ecode = tdb_read_convert(tdb, off, &rec, sizeof(rec)); ecode = tdb_read_convert(tdb, off, &rec, sizeof(rec));
if (ecode != TDB_SUCCESS) { if (ecode != TDB_SUCCESS) {
tdb->ecode = ecode;
goto err; goto err;
} }
if (frec_len(&rec) != data_len) { if (frec_len(&rec) != data_len) {
tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR, ecode = tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
"coalesce: expected data len %zu not %zu", "coalesce: expected data len %zu not %zu",
(size_t)data_len, (size_t)frec_len(&rec)); (size_t)data_len, (size_t)frec_len(&rec));
goto err; goto err;
} }
if (remove_from_list(tdb, b_off, off, &rec) == -1) ecode = remove_from_list(tdb, b_off, off, &rec);
if (ecode != TDB_SUCCESS) {
goto err; goto err;
}
/* We have to drop this to avoid deadlocks, so make sure record /* We have to drop this to avoid deadlocks, so make sure record
* doesn't get coalesced by someone else! */ * doesn't get coalesced by someone else! */
...@@ -389,21 +384,23 @@ static int coalesce(struct tdb_context *tdb, ...@@ -389,21 +384,23 @@ static int coalesce(struct tdb_context *tdb,
ftable_and_len), ftable_and_len),
rec.ftable_and_len); rec.ftable_and_len);
if (ecode != TDB_SUCCESS) { if (ecode != TDB_SUCCESS) {
tdb->ecode = ecode;
goto err; goto err;
} }
add_stat(tdb, alloc_coalesce_succeeded, 1); add_stat(tdb, alloc_coalesce_succeeded, 1);
tdb_unlock_free_bucket(tdb, b_off); tdb_unlock_free_bucket(tdb, b_off);
if (add_free_record(tdb, off, end - off) == -1) ecode = add_free_record(tdb, off, end - off);
return -1; if (ecode != TDB_SUCCESS) {
return 1; tdb->ecode = ecode;
return ecode;
}
return true;
err: err:
/* To unify error paths, we *always* unlock bucket on error. */ /* To unify error paths, we *always* unlock bucket on error. */
tdb_unlock_free_bucket(tdb, b_off); tdb_unlock_free_bucket(tdb, b_off);
return -1; return ecode;
} }
/* We need size bytes to put our key and data in. */ /* We need size bytes to put our key and data in. */
...@@ -429,8 +426,7 @@ again: ...@@ -429,8 +426,7 @@ again:
/* Lock this bucket. */ /* Lock this bucket. */
ecode = tdb_lock_free_bucket(tdb, b_off, TDB_LOCK_WAIT); ecode = tdb_lock_free_bucket(tdb, b_off, TDB_LOCK_WAIT);
if (ecode != TDB_SUCCESS) { if (ecode != TDB_SUCCESS) {
tdb->ecode = ecode; return ecode;
return TDB_OFF_ERR;
} }
best.ftable_and_len = -1ULL; best.ftable_and_len = -1ULL;
...@@ -446,7 +442,7 @@ again: ...@@ -446,7 +442,7 @@ again:
* as we go. */ * as we go. */
off = tdb_read_off(tdb, b_off); off = tdb_read_off(tdb, b_off);
if (TDB_OFF_IS_ERR(off)) { if (TDB_OFF_IS_ERR(off)) {
tdb->ecode = off; ecode = off;
goto unlock_err; goto unlock_err;
} }
...@@ -454,18 +450,21 @@ again: ...@@ -454,18 +450,21 @@ again:
const struct tdb_free_record *r; const struct tdb_free_record *r;
tdb_len_t len; tdb_len_t len;
tdb_off_t next; tdb_off_t next;
int coal;
r = tdb_access_read(tdb, off, sizeof(*r), true); r = tdb_access_read(tdb, off, sizeof(*r), true);
if (TDB_PTR_IS_ERR(r)) { if (TDB_PTR_IS_ERR(r)) {
tdb->ecode = TDB_PTR_ERR(r); ecode = TDB_PTR_ERR(r);
goto unlock_err; goto unlock_err;
} }
if (frec_magic(r) != TDB_FREE_MAGIC) { if (frec_magic(r) != TDB_FREE_MAGIC) {
tdb_access_release(tdb, r); tdb_access_release(tdb, r);
tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR, ecode = tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
"lock_and_alloc: %llu non-free 0x%llx", "lock_and_alloc:"
(long long)off, (long long)r->magic_and_prev); " %llu non-free 0x%llx",
(long long)off,
(long long)r->magic_and_prev);
goto unlock_err; goto unlock_err;
} }
...@@ -486,14 +485,15 @@ again: ...@@ -486,14 +485,15 @@ again:
tdb_access_release(tdb, r); tdb_access_release(tdb, r);
/* Since we're going slow anyway, try coalescing here. */ /* Since we're going slow anyway, try coalescing here. */
switch (coalesce(tdb, off, b_off, len)) { coal = coalesce(tdb, off, b_off, len);
case -1: if (coal == 1) {
/* This has already unlocked on error. */
return -1;
case 1:
/* This has unlocked list, restart. */ /* This has unlocked list, restart. */
goto again; goto again;
} }
if (coal < 0) {
/* This has already unlocked on error. */
return coal;
}
off = next; off = next;
} }
...@@ -503,8 +503,10 @@ again: ...@@ -503,8 +503,10 @@ again:
size_t leftover; size_t leftover;
/* We're happy with this size: take it. */ /* We're happy with this size: take it. */
if (remove_from_list(tdb, b_off, best_off, &best) != 0) ecode = remove_from_list(tdb, b_off, best_off, &best);
if (ecode != TDB_SUCCESS) {
goto unlock_err; goto unlock_err;
}
leftover = record_leftover(keylen, datalen, want_extra, leftover = record_leftover(keylen, datalen, want_extra,
frec_len(&best)); frec_len(&best));
...@@ -512,13 +514,14 @@ again: ...@@ -512,13 +514,14 @@ again:
assert(keylen + datalen + leftover <= frec_len(&best)); assert(keylen + datalen + leftover <= frec_len(&best));
/* We need to mark non-free before we drop lock, otherwise /* We need to mark non-free before we drop lock, otherwise
* coalesce() could try to merge it! */ * coalesce() could try to merge it! */
if (set_header(tdb, &rec, magic, keylen, datalen, ecode = set_header(tdb, &rec, magic, keylen, datalen,
frec_len(&best) - leftover, hashlow) != 0) frec_len(&best) - leftover, hashlow);
if (ecode != TDB_SUCCESS) {
goto unlock_err; goto unlock_err;
}
ecode = tdb_write_convert(tdb, best_off, &rec, sizeof(rec)); ecode = tdb_write_convert(tdb, best_off, &rec, sizeof(rec));
if (ecode != TDB_SUCCESS) { if (ecode != TDB_SUCCESS) {
tdb->ecode = ecode;
goto unlock_err; goto unlock_err;
} }
...@@ -526,11 +529,13 @@ again: ...@@ -526,11 +529,13 @@ again:
* locking is allowed. */ * locking is allowed. */
if (leftover) { if (leftover) {
add_stat(tdb, alloc_leftover, 1); add_stat(tdb, alloc_leftover, 1);
if (add_free_record(tdb, ecode = add_free_record(tdb,
best_off + sizeof(rec) best_off + sizeof(rec)
+ frec_len(&best) - leftover, + frec_len(&best) - leftover,
leftover)) leftover);
best_off = TDB_OFF_ERR; if (ecode != TDB_SUCCESS) {
best_off = ecode;
}
} }
tdb_unlock_free_bucket(tdb, b_off); tdb_unlock_free_bucket(tdb, b_off);
...@@ -542,10 +547,10 @@ again: ...@@ -542,10 +547,10 @@ again:
unlock_err: unlock_err:
tdb_unlock_free_bucket(tdb, b_off); tdb_unlock_free_bucket(tdb, b_off);
return TDB_OFF_ERR; return ecode;
} }
/* Get a free block from current free list, or 0 if none. */ /* Get a free block from current free list, or 0 if none, -ve on error. */
static tdb_off_t get_free(struct tdb_context *tdb, static tdb_off_t get_free(struct tdb_context *tdb,
size_t keylen, size_t datalen, bool want_extra, size_t keylen, size_t datalen, bool want_extra,
unsigned magic, unsigned hashlow) unsigned magic, unsigned hashlow)
...@@ -572,8 +577,8 @@ static tdb_off_t get_free(struct tdb_context *tdb, ...@@ -572,8 +577,8 @@ static tdb_off_t get_free(struct tdb_context *tdb,
off = lock_and_alloc(tdb, ftable_off, off = lock_and_alloc(tdb, ftable_off,
b, keylen, datalen, want_extra, b, keylen, datalen, want_extra,
magic, hashlow); magic, hashlow);
if (off == TDB_OFF_ERR) if (TDB_OFF_IS_ERR(off))
return TDB_OFF_ERR; return off;
if (off != 0) { if (off != 0) {
if (b == start_b) if (b == start_b)
add_stat(tdb, alloc_bucket_exact, 1); add_stat(tdb, alloc_bucket_exact, 1);
...@@ -588,15 +593,13 @@ static tdb_off_t get_free(struct tdb_context *tdb, ...@@ -588,15 +593,13 @@ static tdb_off_t get_free(struct tdb_context *tdb,
} }
if (TDB_OFF_IS_ERR(b)) { if (TDB_OFF_IS_ERR(b)) {
tdb->ecode = b; return b;
return 0;
} }
/* Hmm, try next table. */ /* Hmm, try next table. */
ftable_off = next_ftable(tdb, ftable_off); ftable_off = next_ftable(tdb, ftable_off);
if (TDB_OFF_IS_ERR(ftable_off)) { if (TDB_OFF_IS_ERR(ftable_off)) {
tdb->ecode = ftable_off; return ftable_off;
return 0;
} }
ftable++; ftable++;
...@@ -604,8 +607,7 @@ static tdb_off_t get_free(struct tdb_context *tdb, ...@@ -604,8 +607,7 @@ static tdb_off_t get_free(struct tdb_context *tdb,
wrapped = true; wrapped = true;
ftable_off = first_ftable(tdb); ftable_off = first_ftable(tdb);
if (TDB_OFF_IS_ERR(ftable_off)) { if (TDB_OFF_IS_ERR(ftable_off)) {
tdb->ecode = ftable_off; return ftable_off;
return 0;
} }
ftable = 0; ftable = 0;
} }
...@@ -614,7 +616,7 @@ static tdb_off_t get_free(struct tdb_context *tdb, ...@@ -614,7 +616,7 @@ static tdb_off_t get_free(struct tdb_context *tdb,
return 0; return 0;
} }
int set_header(struct tdb_context *tdb, enum TDB_ERROR set_header(struct tdb_context *tdb,
struct tdb_used_record *rec, struct tdb_used_record *rec,
unsigned magic, uint64_t keylen, uint64_t datalen, unsigned magic, uint64_t keylen, uint64_t datalen,
uint64_t actuallen, unsigned hashlow) uint64_t actuallen, unsigned hashlow)
...@@ -632,17 +634,16 @@ int set_header(struct tdb_context *tdb, ...@@ -632,17 +634,16 @@ int set_header(struct tdb_context *tdb,
if (rec_key_length(rec) != keylen if (rec_key_length(rec) != keylen
|| rec_data_length(rec) != datalen || rec_data_length(rec) != datalen
|| rec_extra_padding(rec) != actuallen - (keylen + datalen)) { || rec_extra_padding(rec) != actuallen - (keylen + datalen)) {
tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR, return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
"Could not encode k=%llu,d=%llu,a=%llu", "Could not encode k=%llu,d=%llu,a=%llu",
(long long)keylen, (long long)datalen, (long long)keylen, (long long)datalen,
(long long)actuallen); (long long)actuallen);
return -1;
} }
return 0; return TDB_SUCCESS;
} }
/* Expand the database. */ /* Expand the database. */
static int tdb_expand(struct tdb_context *tdb, tdb_len_t size) static enum TDB_ERROR tdb_expand(struct tdb_context *tdb, tdb_len_t size)
{ {
uint64_t old_size; uint64_t old_size;
tdb_len_t wanted; tdb_len_t wanted;
...@@ -654,9 +655,8 @@ static int tdb_expand(struct tdb_context *tdb, tdb_len_t size) ...@@ -654,9 +655,8 @@ static int tdb_expand(struct tdb_context *tdb, tdb_len_t size)
/* Need to hold a hash lock to expand DB: transactions rely on it. */ /* Need to hold a hash lock to expand DB: transactions rely on it. */
if (!(tdb->flags & TDB_NOLOCK) if (!(tdb->flags & TDB_NOLOCK)
&& !tdb->allrecord_lock.count && !tdb_has_hash_locks(tdb)) { && !tdb->allrecord_lock.count && !tdb_has_hash_locks(tdb)) {
tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR, return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
"tdb_expand: must hold lock during expand"); "tdb_expand: must hold lock during expand");
return -1;
} }
/* always make room for at least 100 more records, and at /* always make room for at least 100 more records, and at
...@@ -670,8 +670,7 @@ static int tdb_expand(struct tdb_context *tdb, tdb_len_t size) ...@@ -670,8 +670,7 @@ static int tdb_expand(struct tdb_context *tdb, tdb_len_t size)
/* Only one person can expand file at a time. */ /* Only one person can expand file at a time. */
ecode = tdb_lock_expand(tdb, F_WRLCK); ecode = tdb_lock_expand(tdb, F_WRLCK);
if (ecode != TDB_SUCCESS) { if (ecode != TDB_SUCCESS) {
tdb->ecode = ecode; return ecode;
return -1;
} }
/* Someone else may have expanded the file, so retry. */ /* Someone else may have expanded the file, so retry. */
...@@ -679,14 +678,13 @@ static int tdb_expand(struct tdb_context *tdb, tdb_len_t size) ...@@ -679,14 +678,13 @@ static int tdb_expand(struct tdb_context *tdb, tdb_len_t size)
tdb->methods->oob(tdb, tdb->map_size + 1, true); tdb->methods->oob(tdb, tdb->map_size + 1, true);
if (tdb->map_size != old_size) { if (tdb->map_size != old_size) {
tdb_unlock_expand(tdb, F_WRLCK); tdb_unlock_expand(tdb, F_WRLCK);
return 0; return TDB_SUCCESS;
} }
ecode = tdb->methods->expand_file(tdb, wanted); ecode = tdb->methods->expand_file(tdb, wanted);
if (ecode != TDB_SUCCESS) { if (ecode != TDB_SUCCESS) {
tdb->ecode = ecode;
tdb_unlock_expand(tdb, F_WRLCK); tdb_unlock_expand(tdb, F_WRLCK);
return -1; return ecode;
} }
/* We need to drop this lock before adding free record. */ /* We need to drop this lock before adding free record. */
...@@ -706,12 +704,15 @@ tdb_off_t alloc(struct tdb_context *tdb, size_t keylen, size_t datalen, ...@@ -706,12 +704,15 @@ tdb_off_t alloc(struct tdb_context *tdb, size_t keylen, size_t datalen,
assert(!tdb->direct_access); assert(!tdb->direct_access);
for (;;) { for (;;) {
enum TDB_ERROR ecode;
off = get_free(tdb, keylen, datalen, growing, magic, hash); off = get_free(tdb, keylen, datalen, growing, magic, hash);
if (likely(off != 0)) if (likely(off != 0))
break; break;
if (tdb_expand(tdb, adjust_size(keylen, datalen))) ecode = tdb_expand(tdb, adjust_size(keylen, datalen));
return TDB_OFF_ERR; if (ecode != TDB_SUCCESS) {
return ecode;
}
} }
return off; return off;
......
...@@ -448,8 +448,8 @@ static enum TDB_ERROR COLD add_to_chain(struct tdb_context *tdb, ...@@ -448,8 +448,8 @@ static enum TDB_ERROR COLD add_to_chain(struct tdb_context *tdb,
if (!next) { if (!next) {
next = alloc(tdb, 0, sizeof(struct tdb_chain), 0, next = alloc(tdb, 0, sizeof(struct tdb_chain), 0,
TDB_CHAIN_MAGIC, false); TDB_CHAIN_MAGIC, false);
if (next == TDB_OFF_ERR) if (TDB_OFF_IS_ERR(next))
return tdb->ecode; return next;
ecode = zero_out(tdb, ecode = zero_out(tdb,
next+sizeof(struct tdb_used_record), next+sizeof(struct tdb_used_record),
sizeof(struct tdb_chain)); sizeof(struct tdb_chain));
...@@ -521,8 +521,8 @@ static enum TDB_ERROR expand_group(struct tdb_context *tdb, struct hash_info *h) ...@@ -521,8 +521,8 @@ static enum TDB_ERROR expand_group(struct tdb_context *tdb, struct hash_info *h)
} }
subhash = alloc(tdb, 0, subsize, 0, magic, false); subhash = alloc(tdb, 0, subsize, 0, magic, false);
if (subhash == TDB_OFF_ERR) { if (TDB_OFF_IS_ERR(subhash)) {
return tdb->ecode; return subhash;
} }
ecode = zero_out(tdb, subhash + sizeof(struct tdb_used_record), ecode = zero_out(tdb, subhash + sizeof(struct tdb_used_record),
......
...@@ -73,7 +73,6 @@ typedef uint64_t tdb_off_t; ...@@ -73,7 +73,6 @@ typedef uint64_t tdb_off_t;
#define TDB_RECOVERY_MAGIC (0xf53bc0e7ad124589ULL) #define TDB_RECOVERY_MAGIC (0xf53bc0e7ad124589ULL)
#define TDB_RECOVERY_INVALID_MAGIC (0x0ULL) #define TDB_RECOVERY_INVALID_MAGIC (0x0ULL)
#define TDB_OFF_ERR ((tdb_off_t)-1)
#define TDB_OFF_IS_ERR(off) unlikely(off >= (tdb_off_t)TDB_ERR_LAST) #define TDB_OFF_IS_ERR(off) unlikely(off >= (tdb_off_t)TDB_ERR_LAST)
/* Packing errors into pointers and v.v. */ /* Packing errors into pointers and v.v. */
...@@ -429,22 +428,22 @@ enum TDB_ERROR delete_from_hash(struct tdb_context *tdb, struct hash_info *h); ...@@ -429,22 +428,22 @@ enum TDB_ERROR delete_from_hash(struct tdb_context *tdb, struct hash_info *h);
bool is_subhash(tdb_off_t val); bool is_subhash(tdb_off_t val);
/* free.c: */ /* free.c: */
int tdb_ftable_init(struct tdb_context *tdb); enum TDB_ERROR tdb_ftable_init(struct tdb_context *tdb);
/* check.c needs these to iterate through free lists. */ /* check.c needs these to iterate through free lists. */
tdb_off_t first_ftable(struct tdb_context *tdb); tdb_off_t first_ftable(struct tdb_context *tdb);
tdb_off_t next_ftable(struct tdb_context *tdb, tdb_off_t ftable); tdb_off_t next_ftable(struct tdb_context *tdb, tdb_off_t ftable);
/* This returns space or TDB_OFF_ERR. */ /* This returns space or -ve error number. */
tdb_off_t alloc(struct tdb_context *tdb, size_t keylen, size_t datalen, tdb_off_t alloc(struct tdb_context *tdb, size_t keylen, size_t datalen,
uint64_t hash, unsigned magic, bool growing); uint64_t hash, unsigned magic, bool growing);
/* Put this record in a free list. */ /* Put this record in a free list. */
int add_free_record(struct tdb_context *tdb, enum TDB_ERROR add_free_record(struct tdb_context *tdb,
tdb_off_t off, tdb_len_t len_with_header); tdb_off_t off, tdb_len_t len_with_header);
/* Set up header for a used/ftable/htable/chain record. */ /* Set up header for a used/ftable/htable/chain record. */
int set_header(struct tdb_context *tdb, enum TDB_ERROR set_header(struct tdb_context *tdb,
struct tdb_used_record *rec, struct tdb_used_record *rec,
unsigned magic, uint64_t keylen, uint64_t datalen, unsigned magic, uint64_t keylen, uint64_t datalen,
uint64_t actuallen, unsigned hashlow); uint64_t actuallen, unsigned hashlow);
......
...@@ -117,9 +117,13 @@ static int tdb_new_database(struct tdb_context *tdb, ...@@ -117,9 +117,13 @@ static int tdb_new_database(struct tdb_context *tdb,
/* Free is empty. */ /* Free is empty. */
newdb.hdr.free_table = offsetof(struct new_database, ftable); newdb.hdr.free_table = offsetof(struct new_database, ftable);
memset(&newdb.ftable, 0, sizeof(newdb.ftable)); memset(&newdb.ftable, 0, sizeof(newdb.ftable));
set_header(NULL, &newdb.ftable.hdr, TDB_FTABLE_MAGIC, 0, tdb->ecode = set_header(NULL, &newdb.ftable.hdr, TDB_FTABLE_MAGIC, 0,
sizeof(newdb.ftable) - sizeof(newdb.ftable.hdr), sizeof(newdb.ftable) - sizeof(newdb.ftable.hdr),
sizeof(newdb.ftable) - sizeof(newdb.ftable.hdr), 0); sizeof(newdb.ftable) - sizeof(newdb.ftable.hdr),
0);
if (tdb->ecode != TDB_SUCCESS) {
return -1;
}
/* Magic food */ /* Magic food */
memset(newdb.hdr.magic_food, 0, sizeof(newdb.hdr.magic_food)); memset(newdb.hdr.magic_food, 0, sizeof(newdb.hdr.magic_food));
...@@ -354,8 +358,10 @@ struct tdb_context *tdb_open(const char *name, int tdb_flags, ...@@ -354,8 +358,10 @@ struct tdb_context *tdb_open(const char *name, int tdb_flags,
} }
} }
if (tdb_ftable_init(tdb) == -1) tdb->ecode = tdb_ftable_init(tdb);
if (tdb->ecode != TDB_SUCCESS) {
goto fail; goto fail;
}
tdb->next = tdbs; tdb->next = tdbs;
tdbs = tdb; tdbs = tdb;
...@@ -415,9 +421,12 @@ static int update_rec_hdr(struct tdb_context *tdb, ...@@ -415,9 +421,12 @@ static int update_rec_hdr(struct tdb_context *tdb,
uint64_t dataroom = rec_data_length(rec) + rec_extra_padding(rec); uint64_t dataroom = rec_data_length(rec) + rec_extra_padding(rec);
enum TDB_ERROR ecode; enum TDB_ERROR ecode;
if (set_header(tdb, rec, TDB_USED_MAGIC, keylen, datalen, ecode = set_header(tdb, rec, TDB_USED_MAGIC, keylen, datalen,
keylen + dataroom, h)) keylen + dataroom, h);
if (ecode != TDB_SUCCESS) {
tdb->ecode = ecode;
return -1; return -1;
}
ecode = tdb_write_convert(tdb, off, rec, sizeof(*rec)); ecode = tdb_write_convert(tdb, off, rec, sizeof(*rec));
if (ecode != TDB_SUCCESS) { if (ecode != TDB_SUCCESS) {
...@@ -440,15 +449,18 @@ static int replace_data(struct tdb_context *tdb, ...@@ -440,15 +449,18 @@ static int replace_data(struct tdb_context *tdb,
/* Allocate a new record. */ /* Allocate a new record. */
new_off = alloc(tdb, key.dsize, dbuf.dsize, h->h, TDB_USED_MAGIC, new_off = alloc(tdb, key.dsize, dbuf.dsize, h->h, TDB_USED_MAGIC,
growing); growing);
if (unlikely(new_off == TDB_OFF_ERR)) if (TDB_OFF_IS_ERR(new_off)) {
tdb->ecode = new_off;
return -1; return -1;
}
/* We didn't like the existing one: remove it. */ /* We didn't like the existing one: remove it. */
if (old_off) { if (old_off) {
add_stat(tdb, frees, 1); add_stat(tdb, frees, 1);
add_free_record(tdb, old_off, ecode = add_free_record(tdb, old_off,
sizeof(struct tdb_used_record) sizeof(struct tdb_used_record)
+ key.dsize + old_room); + key.dsize + old_room);
if (ecode == TDB_SUCCESS)
ecode = replace_in_hash(tdb, h, new_off); ecode = replace_in_hash(tdb, h, new_off);
} else { } else {
ecode = add_to_hash(tdb, h, new_off); ecode = add_to_hash(tdb, h, new_off);
...@@ -676,12 +688,15 @@ int tdb_delete(struct tdb_context *tdb, struct tdb_data key) ...@@ -676,12 +688,15 @@ int tdb_delete(struct tdb_context *tdb, struct tdb_data key)
/* Free the deleted entry. */ /* Free the deleted entry. */
add_stat(tdb, frees, 1); add_stat(tdb, frees, 1);
if (add_free_record(tdb, off, ecode = add_free_record(tdb, off,
sizeof(struct tdb_used_record) sizeof(struct tdb_used_record)
+ rec_key_length(&rec) + rec_key_length(&rec)
+ rec_data_length(&rec) + rec_data_length(&rec)
+ rec_extra_padding(&rec)) != 0) + rec_extra_padding(&rec));
if (ecode != TDB_SUCCESS) {
tdb->ecode = ecode;
goto unlock_err; goto unlock_err;
}
tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range, F_WRLCK); tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range, F_WRLCK);
return 0; return 0;
......
...@@ -11,8 +11,8 @@ ...@@ -11,8 +11,8 @@
#include "logging.h" #include "logging.h"
/* FIXME: Check these! */ /* FIXME: Check these! */
#define INITIAL_TDB_MALLOC "tdb.c", 178, FAILTEST_MALLOC #define INITIAL_TDB_MALLOC "tdb.c", 182, FAILTEST_MALLOC
#define LOGGING_MALLOC "tdb.c", 734, FAILTEST_MALLOC #define LOGGING_MALLOC "tdb.c", 792, FAILTEST_MALLOC
#define URANDOM_OPEN "tdb.c", 49, FAILTEST_OPEN #define URANDOM_OPEN "tdb.c", 49, FAILTEST_OPEN
#define URANDOM_READ "tdb.c", 29, FAILTEST_READ #define URANDOM_READ "tdb.c", 29, FAILTEST_READ
......
...@@ -67,7 +67,7 @@ int main(int argc, char *argv[]) ...@@ -67,7 +67,7 @@ int main(int argc, char *argv[])
/* Allocate a new record. */ /* Allocate a new record. */
new_off = alloc(tdb, key.dsize, dbuf.dsize, h.h, new_off = alloc(tdb, key.dsize, dbuf.dsize, h.h,
TDB_USED_MAGIC, false); TDB_USED_MAGIC, false);
ok1(new_off != TDB_OFF_ERR); ok1(!TDB_OFF_IS_ERR(new_off));
/* We should be able to add it now. */ /* We should be able to add it now. */
ok1(add_to_hash(tdb, &h, new_off) == 0); ok1(add_to_hash(tdb, &h, new_off) == 0);
...@@ -228,7 +228,7 @@ int main(int argc, char *argv[]) ...@@ -228,7 +228,7 @@ int main(int argc, char *argv[])
/* Allocate a new record. */ /* Allocate a new record. */
new_off = alloc(tdb, key.dsize, dbuf.dsize, h.h, new_off = alloc(tdb, key.dsize, dbuf.dsize, h.h,
TDB_USED_MAGIC, false); TDB_USED_MAGIC, false);
ok1(new_off != TDB_OFF_ERR); ok1(!TDB_OFF_IS_ERR(new_off));
ok1(add_to_hash(tdb, &h, new_off) == 0); ok1(add_to_hash(tdb, &h, new_off) == 0);
/* Make sure we fill it in for later finding. */ /* Make sure we fill it in for later finding. */
......
...@@ -681,9 +681,10 @@ static int tdb_recovery_allocate(struct tdb_context *tdb, ...@@ -681,9 +681,10 @@ static int tdb_recovery_allocate(struct tdb_context *tdb,
the transaction) */ the transaction) */
if (recovery_head != 0) { if (recovery_head != 0) {
add_stat(tdb, frees, 1); add_stat(tdb, frees, 1);
if (add_free_record(tdb, recovery_head, ecode = add_free_record(tdb, recovery_head,
sizeof(rec) + rec.max_len) != 0) { sizeof(rec) + rec.max_len);
tdb_logerr(tdb, tdb->ecode, TDB_LOG_ERROR, if (ecode != TDB_SUCCESS) {
tdb_logerr(tdb, ecode, TDB_LOG_ERROR,
"tdb_recovery_allocate:" "tdb_recovery_allocate:"
" failed to free previous recovery area"); " failed to free previous recovery area");
return -1; return -1;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment