Commit 0a700890 authored by Kent Overstreet's avatar Kent Overstreet Committed by Kent Overstreet

bcachefs: Kick off btree node writes from write completions

This is a performance improvement by removing the need to wait for the
in flight btree write to complete before kicking one off, which is going
to be needed to avoid a performance regression with the upcoming patch
to update btree ptrs after every btree write.
Signed-off-by: default avatarKent Overstreet <kent.overstreet@gmail.com>
parent 2680325b
......@@ -234,7 +234,7 @@ static int __btree_node_reclaim(struct bch_fs *c, struct btree *b, bool flush)
if (bch2_verify_btree_ondisk)
bch2_btree_node_write(c, b, SIX_LOCK_intent);
else
__bch2_btree_node_write(c, b);
__bch2_btree_node_write(c, b, false);
six_unlock_write(&b->c.lock);
six_unlock_intent(&b->c.lock);
......@@ -1009,7 +1009,7 @@ void bch2_btree_node_evict(struct bch_fs *c, const struct bkey_i *k)
six_lock_write(&b->c.lock, NULL, NULL);
if (btree_node_dirty(b)) {
__bch2_btree_node_write(c, b);
__bch2_btree_node_write(c, b, false);
six_unlock_write(&b->c.lock);
six_unlock_intent(&b->c.lock);
goto wait_on_io;
......
......@@ -1562,9 +1562,47 @@ void bch2_btree_complete_write(struct bch_fs *c, struct btree *b,
static void btree_node_write_done(struct bch_fs *c, struct btree *b)
{
struct btree_write *w = btree_prev_write(b);
unsigned long old, new, v;
bch2_btree_complete_write(c, b, w);
bch2_btree_node_io_unlock(b);
v = READ_ONCE(b->flags);
do {
old = new = v;
if (old & (1U << BTREE_NODE_need_write))
goto do_write;
new &= ~(1U << BTREE_NODE_write_in_flight);
} while ((v = cmpxchg(&b->flags, old, new)) != old);
wake_up_bit(&b->flags, BTREE_NODE_write_in_flight);
return;
do_write:
six_lock_read(&b->c.lock, NULL, NULL);
v = READ_ONCE(b->flags);
do {
old = new = v;
if ((old & (1U << BTREE_NODE_dirty)) &&
(old & (1U << BTREE_NODE_need_write)) &&
!(old & (1U << BTREE_NODE_never_write)) &&
btree_node_may_write(b)) {
new &= ~(1U << BTREE_NODE_dirty);
new &= ~(1U << BTREE_NODE_need_write);
new |= (1U << BTREE_NODE_write_in_flight);
new |= (1U << BTREE_NODE_just_written);
new ^= (1U << BTREE_NODE_write_idx);
} else {
new &= ~(1U << BTREE_NODE_write_in_flight);
}
} while ((v = cmpxchg(&b->flags, old, new)) != old);
if (new & (1U << BTREE_NODE_write_in_flight))
__bch2_btree_node_write(c, b, true);
six_unlock_read(&b->c.lock);
}
static void bch2_btree_node_write_error(struct bch_fs *c,
......@@ -1729,7 +1767,7 @@ static void btree_write_submit(struct work_struct *work)
bch2_submit_wbio_replicas(&wbio->wbio, wbio->wbio.c, BCH_DATA_btree, &wbio->key);
}
void __bch2_btree_node_write(struct bch_fs *c, struct btree *b)
void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, bool already_started)
{
struct btree_write_bio *wbio;
struct bset_tree *t;
......@@ -1746,7 +1784,8 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b)
bool validate_before_checksum = false;
void *data;
BUG_ON(btree_node_write_in_flight(b));
if (already_started)
goto do_write;
if (test_bit(BCH_FS_HOLD_BTREE_WRITES, &c->flags))
return;
......@@ -1770,14 +1809,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b)
if (old & (1 << BTREE_NODE_never_write))
return;
if (old & (1 << BTREE_NODE_write_in_flight)) {
/*
* XXX waiting on btree writes with btree locks held -
* this can deadlock, and we hit the write error path
*/
bch2_btree_node_wait_on_write(b);
continue;
}
BUG_ON(old & (1 << BTREE_NODE_write_in_flight));
new &= ~(1 << BTREE_NODE_dirty);
new &= ~(1 << BTREE_NODE_need_write);
......@@ -1786,6 +1818,9 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b)
new ^= (1 << BTREE_NODE_write_idx);
} while (cmpxchg_acquire(&b->flags, old, new) != old);
if (new & (1U << BTREE_NODE_need_write))
return;
do_write:
atomic_dec(&c->btree_cache.dirty);
BUG_ON(btree_node_fake(b));
......@@ -2041,7 +2076,7 @@ void bch2_btree_node_write(struct bch_fs *c, struct btree *b,
if (lock_type_held == SIX_LOCK_intent ||
(lock_type_held == SIX_LOCK_read &&
six_lock_tryupgrade(&b->c.lock))) {
__bch2_btree_node_write(c, b);
__bch2_btree_node_write(c, b, false);
/* don't cycle lock unnecessarily: */
if (btree_node_just_written(b) &&
......@@ -2053,7 +2088,7 @@ void bch2_btree_node_write(struct bch_fs *c, struct btree *b,
if (lock_type_held == SIX_LOCK_read)
six_lock_downgrade(&b->c.lock);
} else {
__bch2_btree_node_write(c, b);
__bch2_btree_node_write(c, b, false);
if (lock_type_held == SIX_LOCK_write &&
btree_node_just_written(b))
bch2_btree_post_write_cleanup(c, b);
......
......@@ -139,7 +139,7 @@ void bch2_btree_complete_write(struct bch_fs *, struct btree *,
struct btree_write *);
void bch2_btree_write_error_work(struct work_struct *);
void __bch2_btree_node_write(struct bch_fs *, struct btree *);
void __bch2_btree_node_write(struct bch_fs *, struct btree *, bool);
bool bch2_btree_post_write_cleanup(struct bch_fs *, struct btree *);
void bch2_btree_node_write(struct bch_fs *, struct btree *,
......@@ -148,18 +148,11 @@ void bch2_btree_node_write(struct bch_fs *, struct btree *,
static inline void btree_node_write_if_need(struct bch_fs *c, struct btree *b,
enum six_lock_type lock_held)
{
while (b->written &&
btree_node_need_write(b) &&
btree_node_may_write(b)) {
if (!btree_node_write_in_flight(b)) {
bch2_btree_node_write(c, b, lock_held);
break;
}
six_unlock_type(&b->c.lock, lock_held);
bch2_btree_node_wait_on_write(b);
btree_node_lock_type(c, b, lock_held);
}
if (b->written &&
btree_node_need_write(b) &&
btree_node_may_write(b) &&
!btree_node_write_in_flight(b))
bch2_btree_node_write(c, b, lock_held);
}
#define bch2_btree_node_write_cond(_c, _b, cond) \
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment