Commit ebdd6451 authored by Rusty Russell's avatar Rusty Russell

tdb2: now checking a new empty database works.

parent 142afe32
......@@ -74,6 +74,7 @@ int main(int argc, char *argv[])
printf("ccan/hash\n");
printf("ccan/likely\n");
printf("ccan/asearch\n");
printf("ccan/build_assert\n");
return 0;
}
......
......@@ -187,7 +187,8 @@ static bool check_hash_list(struct tdb_context *tdb,
num_nonzero++;
}
if (num_found != num_used) {
/* free table and hash table are two of the used blocks. */
if (num_found != num_used - 2) {
tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv,
"tdb_check: Not all entries are in hash\n");
return false;
......@@ -322,11 +323,10 @@ int tdb_check(struct tdb_context *tdb,
size_t num_free = 0, num_used = 0;
bool hash_found = false, free_found = false;
/* This always ensures the header is uptodate. */
if (tdb_allrecord_lock(tdb, F_RDLCK, TDB_LOCK_WAIT, false) != 0)
return -1;
update_header(tdb);
if (!check_header(tdb))
goto fail;
......@@ -403,9 +403,9 @@ int tdb_check(struct tdb_context *tdb,
goto fail;
tdb_allrecord_unlock(tdb, F_RDLCK);
return true;
return 0;
fail:
tdb_allrecord_unlock(tdb, F_RDLCK);
return false;
return -1;
}
......@@ -125,11 +125,8 @@ static void *tdb_direct(struct tdb_context *tdb, tdb_off_t off, size_t len)
/* Either make a copy into pad and return that, or return ptr into mmap. */
/* Note: pad has to be a real object, so we can't get here if len
* overflows size_t */
/* FIXME: Transaction */
void *tdb_get(struct tdb_context *tdb, tdb_off_t off, void *pad, size_t len)
{
ssize_t r;
if (likely(!(tdb->flags & TDB_CONVERT))) {
void *ret = tdb_direct(tdb, off, len);
if (ret)
......@@ -139,18 +136,8 @@ void *tdb_get(struct tdb_context *tdb, tdb_off_t off, void *pad, size_t len)
if (unlikely(tdb_oob(tdb, off + len, false) == -1))
return NULL;
r = pread(tdb->fd, pad, len, off);
if (r != (ssize_t)len) {
/* Ensure ecode is set for log fn. */
tdb->ecode = TDB_ERR_IO;
tdb->log(tdb, TDB_DEBUG_FATAL, tdb->log_priv,
"tdb_read failed at %llu "
"len=%lld ret=%lld (%s) map_size=%lld\n",
(long long)off, (long long)len,
(long long)r, strerror(errno),
(long long)tdb->map_size);
if (tdb->methods->read(tdb, off, pad, len) == -1)
return NULL;
}
return tdb_convert(tdb, pad, len);
}
......@@ -249,7 +236,7 @@ tdb_off_t tdb_read_off(struct tdb_context *tdb, tdb_off_t off)
{
tdb_off_t pad, *ret;
ret = tdb_get(tdb, off, &pad, sizeof(ret));
ret = tdb_get(tdb, off, &pad, sizeof(pad));
if (!ret) {
return TDB_OFF_ERR;
}
......@@ -260,7 +247,7 @@ tdb_off_t tdb_read_off(struct tdb_context *tdb, tdb_off_t off)
bool tdb_pwrite_all(int fd, const void *buf, size_t len, tdb_off_t off)
{
while (len) {
size_t ret;
ssize_t ret;
ret = pwrite(fd, buf, len, off);
if (ret < 0)
return false;
......@@ -268,13 +255,51 @@ bool tdb_pwrite_all(int fd, const void *buf, size_t len, tdb_off_t off)
errno = ENOSPC;
return false;
}
buf += ret;
buf = (char *)buf + ret;
off += ret;
len -= ret;
}
return true;
}
/* Even on files, we can get partial reads due to signals. */
bool tdb_pread_all(int fd, void *buf, size_t len, tdb_off_t off)
{
while (len) {
ssize_t ret;
ret = pread(fd, buf, len, off);
if (ret < 0)
return false;
if (ret == 0) {
/* ETOOSHORT? */
errno = EWOULDBLOCK;
return false;
}
buf = (char *)buf + ret;
off += ret;
len -= ret;
}
return true;
}
bool tdb_read_all(int fd, void *buf, size_t len)
{
while (len) {
ssize_t ret;
ret = read(fd, buf, len);
if (ret < 0)
return false;
if (ret == 0) {
/* ETOOSHORT? */
errno = EWOULDBLOCK;
return false;
}
buf = (char *)buf + ret;
len -= ret;
}
return true;
}
/* write a lump of data at a specified offset */
static int tdb_write(struct tdb_context *tdb, tdb_off_t off,
const void *buf, tdb_len_t len)
......@@ -316,15 +341,14 @@ static int tdb_read(struct tdb_context *tdb, tdb_off_t off, void *buf,
if (tdb->map_ptr) {
memcpy(buf, off + (char *)tdb->map_ptr, len);
} else {
ssize_t ret = pread(tdb->fd, buf, len, off);
if (ret != (ssize_t)len) {
if (!tdb_pread_all(tdb->fd, buf, len, off)) {
/* Ensure ecode is set for log fn. */
tdb->ecode = TDB_ERR_IO;
tdb->log(tdb, TDB_DEBUG_FATAL, tdb->log_priv,
"tdb_read failed at %lld "
"len=%lld ret=%lld (%s) map_size=%lld\n",
"len=%lld (%s) map_size=%lld\n",
(long long)off, (long long)len,
(long long)ret, strerror(errno),
strerror(errno),
(long long)tdb->map_size);
return -1;
}
......@@ -376,17 +400,17 @@ uint64_t hash_record(struct tdb_context *tdb, tdb_off_t off)
void *key;
uint64_t klen, hash;
r = tdb_get(tdb, off, &pad, sizeof(*r));
r = tdb_get(tdb, off, &pad, sizeof(pad));
if (!r)
/* FIXME */
return 0;
klen = rec_key_length(r);
key = tdb_direct(tdb, off + sizeof(*r), klen);
key = tdb_direct(tdb, off + sizeof(pad), klen);
if (likely(key))
return tdb_hash(tdb, key, klen);
key = tdb_alloc_read(tdb, off + sizeof(*r), klen);
key = tdb_alloc_read(tdb, off + sizeof(pad), klen);
if (unlikely(!key))
return 0;
hash = tdb_hash(tdb, key, klen);
......
......@@ -436,7 +436,8 @@ static int tdb_lock_gradual(struct tdb_context *tdb,
/* lock/unlock entire database. It can only be upgradable if you have some
* other way of guaranteeing exclusivity (ie. transaction write lock).
* Note that we don't lock the free chains: noone can get those locks
* without a hash chain lock first. */
* without a hash chain lock first.
* The header *will be* up to date once this returns success. */
int tdb_allrecord_lock(struct tdb_context *tdb, int ltype,
enum tdb_lock_flags flags, bool upgradable)
{
......@@ -494,27 +495,27 @@ again:
return -1;
}
tdb->allrecord_lock.count = 1;
/* If it's upgradable, it's actually exclusive so we can treat
* it as a write lock. */
tdb->allrecord_lock.ltype = upgradable ? F_WRLCK : ltype;
tdb->allrecord_lock.off = upgradable;
/* Now we re-check header, holding lock. */
if (unlikely(update_header(tdb))) {
tdb_brunlock(tdb, ltype, TDB_HASH_LOCK_START, hash_size);
tdb_allrecord_unlock(tdb, ltype);
goto again;
}
/* Now check for needing recovery. */
if (unlikely(tdb_needs_recovery(tdb))) {
tdb_brunlock(tdb, ltype, TDB_HASH_LOCK_START, hash_size);
tdb_allrecord_unlock(tdb, ltype);
if (tdb_lock_and_recover(tdb) == -1) {
return -1;
}
goto again;
}
tdb->allrecord_lock.count = 1;
/* If it's upgradable, it's actually exclusive so we can treat
* it as a write lock. */
tdb->allrecord_lock.ltype = upgradable ? F_WRLCK : ltype;
tdb->allrecord_lock.off = upgradable;
return 0;
}
......
......@@ -83,10 +83,10 @@ typedef uint64_t tdb_off_t;
/* Hash chain locks. */
#define TDB_HASH_LOCK_START 2
/* We start wih 256 hash buckets, 10 free buckets. A 1k-sized zone. */
/* We start wih 256 hash buckets, 10 free buckets. A 4k-sized zone. */
#define INITIAL_HASH_BITS 8
#define INITIAL_FREE_BUCKETS 10
#define INITIAL_ZONE_BITS 10
#define INITIAL_ZONE_BITS 12
#if !HAVE_BSWAP_64
static inline uint64_t bswap_64(uint64_t x)
......@@ -328,6 +328,8 @@ tdb_off_t tdb_find_zero_off(struct tdb_context *tdb, tdb_off_t off,
/* Even on files, we can get partial writes due to signals. */
bool tdb_pwrite_all(int fd, const void *buf, size_t len, tdb_off_t off);
bool tdb_pread_all(int fd, void *buf, size_t len, tdb_off_t off);
bool tdb_read_all(int fd, void *buf, size_t len);
/* Allocate and make a copy of some offset. */
void *tdb_alloc_read(struct tdb_context *tdb, tdb_off_t offset, tdb_len_t len);
......
#include "private.h"
#include <ccan/tdb2/tdb2.h>
#include <ccan/hash/hash.h>
#include <ccan/build_assert/build_assert.h>
#include <ccan/likely/likely.h>
#include <assert.h>
......@@ -48,7 +49,7 @@ bool update_header(struct tdb_context *tdb)
static uint64_t jenkins_hash(const void *key, size_t length, uint64_t seed,
void *arg)
{
return hash64_any(key, length, seed);
return hash64_stable((const unsigned char *)key, length, seed);
}
uint64_t tdb_hash(struct tdb_context *tdb, const void *ptr, size_t len)
......@@ -77,7 +78,7 @@ static uint64_t random_number(struct tdb_context *tdb)
fd = open("/dev/urandom", O_RDONLY);
if (fd >= 0) {
if (read(fd, &ret, sizeof(ret)) == sizeof(ret)) {
if (tdb_read_all(fd, &ret, sizeof(ret))) {
tdb->log(tdb, TDB_DEBUG_TRACE, tdb->log_priv,
"tdb_open: random from /dev/urandom\n");
close(fd);
......@@ -130,6 +131,7 @@ static int tdb_new_database(struct tdb_context *tdb)
{
/* We make it up in memory, then write it out if not internal */
struct new_database newdb;
unsigned int magic_off = offsetof(struct tdb_header, magic_food);
/* Fill in the header */
newdb.hdr.version = TDB_VERSION;
......@@ -142,6 +144,9 @@ static int tdb_new_database(struct tdb_context *tdb)
newdb.hdr.v.generation = 0;
/* The initial zone must cover the initial database size! */
BUILD_ASSERT((1ULL << INITIAL_ZONE_BITS) >= sizeof(newdb));
/* Free array has 1 zone, 10 buckets. All buckets empty. */
newdb.hdr.v.num_zones = 1;
newdb.hdr.v.zone_bits = INITIAL_ZONE_BITS;
......@@ -158,6 +163,17 @@ static int tdb_new_database(struct tdb_context *tdb)
sizeof(newdb.hash), sizeof(newdb.hash), 0);
memset(newdb.hash, 0, sizeof(newdb.hash));
/* Magic food */
memset(newdb.hdr.magic_food, 0, sizeof(newdb.hdr.magic_food));
strcpy(newdb.hdr.magic_food, TDB_MAGIC_FOOD);
/* This creates an endian-converted database, as if read from disk */
tdb_convert(tdb,
(char *)&newdb.hdr + magic_off,
sizeof(newdb) - magic_off);
tdb->header = newdb.hdr;
if (tdb->flags & TDB_INTERNAL) {
tdb->map_size = sizeof(newdb);
tdb->map_ptr = malloc(tdb->map_size);
......@@ -166,9 +182,6 @@ static int tdb_new_database(struct tdb_context *tdb)
return -1;
}
memcpy(tdb->map_ptr, &newdb, tdb->map_size);
tdb->header = newdb.hdr;
/* Convert the `ondisk' version if asked. */
tdb_convert(tdb, tdb->map_ptr, sizeof(newdb));
return 0;
}
if (lseek(tdb->fd, 0, SEEK_SET) == -1)
......@@ -177,14 +190,6 @@ static int tdb_new_database(struct tdb_context *tdb)
if (ftruncate(tdb->fd, 0) == -1)
return -1;
/* This creates an endian-converted header, as if read from disk */
tdb->header = newdb.hdr;
tdb_convert(tdb, &tdb->header, sizeof(tdb->header));
/* Don't endian-convert the magic food! */
memset(newdb.hdr.magic_food, 0, sizeof(newdb.hdr.magic_food));
strcpy(newdb.hdr.magic_food, TDB_MAGIC_FOOD);
if (!tdb_pwrite_all(tdb->fd, &newdb, sizeof(newdb), 0)) {
tdb->ecode = TDB_ERR_IO;
return -1;
......@@ -215,6 +220,7 @@ struct tdb_context *tdb_open(const char *name, int tdb_flags,
tdb->log_priv = NULL;
tdb->khash = jenkins_hash;
tdb->hash_priv = NULL;
tdb_io_init(tdb);
/* FIXME */
if (attr) {
......@@ -246,6 +252,7 @@ struct tdb_context *tdb_open(const char *name, int tdb_flags,
goto fail;
}
TEST_IT(tdb->flags & TDB_CONVERT);
tdb_convert(tdb, &tdb->header, sizeof(tdb->header));
goto internal;
}
......@@ -268,8 +275,7 @@ struct tdb_context *tdb_open(const char *name, int tdb_flags,
goto fail; /* errno set by tdb_brlock */
}
errno = 0;
if (read(tdb->fd, &tdb->header, sizeof(tdb->header)) != sizeof(tdb->header)
if (!tdb_pread_all(tdb->fd, &tdb->header, sizeof(tdb->header), 0)
|| strcmp(tdb->header.magic_food, TDB_MAGIC_FOOD) != 0) {
if (!(open_flags & O_CREAT) || tdb_new_database(tdb) == -1) {
if (errno == 0) {
......
......@@ -52,8 +52,7 @@ extern "C" {
#define TDB_INTERNAL 2 /* don't store on disk */
#define TDB_NOLOCK 4 /* don't do any locking */
#define TDB_NOMMAP 8 /* don't use mmap */
#define TDB_CONVERT 16 /* convert endian (internal use) */
#define TDB_BIGENDIAN 32 /* header is big-endian (internal use) */
#define TDB_CONVERT 16 /* convert endian */
#define TDB_NOSYNC 64 /* don't use synchronous transactions */
#define TDB_SEQNUM 128 /* maintain a sequence number */
#define TDB_VOLATILE 256 /* Activate the per-hashchain freelist, default 5 */
......
......@@ -3,14 +3,15 @@
#include <ccan/tdb2/lock.c>
#include <ccan/tdb2/io.c>
#include <ccan/tap/tap.h>
#include "logging.h"
int main(int argc, char *argv[])
{
unsigned int i;
struct tdb_used_record rec;
struct tdb_context tdb = { .log = null_log_fn, .log_priv = NULL };
struct tdb_context tdb = { .log = tap_log_fn, .log_priv = NULL };
plan_tests(64 + 32 + 48*6);
plan_tests(64 + 32 + 48*6 + 1);
/* We should be able to encode any data value. */
for (i = 0; i < 64; i++)
......@@ -36,5 +37,6 @@ int main(int argc, char *argv[])
ok1(rec_hash(&rec) == h);
ok1(rec_magic(&rec) == TDB_MAGIC);
}
ok1(tap_log_messages == 0);
return exit_status();
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment