Commit 0a700890 authored by Kent Overstreet's avatar Kent Overstreet Committed by Kent Overstreet

bcachefs: Kick off btree node writes from write completions

This is a performance improvement by removing the need to wait for the
in flight btree write to complete before kicking one off, which is going
to be needed to avoid a performance regression with the upcoming patch
to update btree ptrs after every btree write.
Signed-off-by: default avatarKent Overstreet <kent.overstreet@gmail.com>
parent 2680325b
...@@ -234,7 +234,7 @@ static int __btree_node_reclaim(struct bch_fs *c, struct btree *b, bool flush) ...@@ -234,7 +234,7 @@ static int __btree_node_reclaim(struct bch_fs *c, struct btree *b, bool flush)
if (bch2_verify_btree_ondisk) if (bch2_verify_btree_ondisk)
bch2_btree_node_write(c, b, SIX_LOCK_intent); bch2_btree_node_write(c, b, SIX_LOCK_intent);
else else
__bch2_btree_node_write(c, b); __bch2_btree_node_write(c, b, false);
six_unlock_write(&b->c.lock); six_unlock_write(&b->c.lock);
six_unlock_intent(&b->c.lock); six_unlock_intent(&b->c.lock);
...@@ -1009,7 +1009,7 @@ void bch2_btree_node_evict(struct bch_fs *c, const struct bkey_i *k) ...@@ -1009,7 +1009,7 @@ void bch2_btree_node_evict(struct bch_fs *c, const struct bkey_i *k)
six_lock_write(&b->c.lock, NULL, NULL); six_lock_write(&b->c.lock, NULL, NULL);
if (btree_node_dirty(b)) { if (btree_node_dirty(b)) {
__bch2_btree_node_write(c, b); __bch2_btree_node_write(c, b, false);
six_unlock_write(&b->c.lock); six_unlock_write(&b->c.lock);
six_unlock_intent(&b->c.lock); six_unlock_intent(&b->c.lock);
goto wait_on_io; goto wait_on_io;
......
...@@ -1562,9 +1562,47 @@ void bch2_btree_complete_write(struct bch_fs *c, struct btree *b, ...@@ -1562,9 +1562,47 @@ void bch2_btree_complete_write(struct bch_fs *c, struct btree *b,
static void btree_node_write_done(struct bch_fs *c, struct btree *b) static void btree_node_write_done(struct bch_fs *c, struct btree *b)
{ {
struct btree_write *w = btree_prev_write(b); struct btree_write *w = btree_prev_write(b);
unsigned long old, new, v;
bch2_btree_complete_write(c, b, w); bch2_btree_complete_write(c, b, w);
bch2_btree_node_io_unlock(b);
v = READ_ONCE(b->flags);
do {
old = new = v;
if (old & (1U << BTREE_NODE_need_write))
goto do_write;
new &= ~(1U << BTREE_NODE_write_in_flight);
} while ((v = cmpxchg(&b->flags, old, new)) != old);
wake_up_bit(&b->flags, BTREE_NODE_write_in_flight);
return;
do_write:
six_lock_read(&b->c.lock, NULL, NULL);
v = READ_ONCE(b->flags);
do {
old = new = v;
if ((old & (1U << BTREE_NODE_dirty)) &&
(old & (1U << BTREE_NODE_need_write)) &&
!(old & (1U << BTREE_NODE_never_write)) &&
btree_node_may_write(b)) {
new &= ~(1U << BTREE_NODE_dirty);
new &= ~(1U << BTREE_NODE_need_write);
new |= (1U << BTREE_NODE_write_in_flight);
new |= (1U << BTREE_NODE_just_written);
new ^= (1U << BTREE_NODE_write_idx);
} else {
new &= ~(1U << BTREE_NODE_write_in_flight);
}
} while ((v = cmpxchg(&b->flags, old, new)) != old);
if (new & (1U << BTREE_NODE_write_in_flight))
__bch2_btree_node_write(c, b, true);
six_unlock_read(&b->c.lock);
} }
static void bch2_btree_node_write_error(struct bch_fs *c, static void bch2_btree_node_write_error(struct bch_fs *c,
...@@ -1729,7 +1767,7 @@ static void btree_write_submit(struct work_struct *work) ...@@ -1729,7 +1767,7 @@ static void btree_write_submit(struct work_struct *work)
bch2_submit_wbio_replicas(&wbio->wbio, wbio->wbio.c, BCH_DATA_btree, &wbio->key); bch2_submit_wbio_replicas(&wbio->wbio, wbio->wbio.c, BCH_DATA_btree, &wbio->key);
} }
void __bch2_btree_node_write(struct bch_fs *c, struct btree *b) void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, bool already_started)
{ {
struct btree_write_bio *wbio; struct btree_write_bio *wbio;
struct bset_tree *t; struct bset_tree *t;
...@@ -1746,7 +1784,8 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b) ...@@ -1746,7 +1784,8 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b)
bool validate_before_checksum = false; bool validate_before_checksum = false;
void *data; void *data;
BUG_ON(btree_node_write_in_flight(b)); if (already_started)
goto do_write;
if (test_bit(BCH_FS_HOLD_BTREE_WRITES, &c->flags)) if (test_bit(BCH_FS_HOLD_BTREE_WRITES, &c->flags))
return; return;
...@@ -1770,14 +1809,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b) ...@@ -1770,14 +1809,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b)
if (old & (1 << BTREE_NODE_never_write)) if (old & (1 << BTREE_NODE_never_write))
return; return;
if (old & (1 << BTREE_NODE_write_in_flight)) { BUG_ON(old & (1 << BTREE_NODE_write_in_flight));
/*
* XXX waiting on btree writes with btree locks held -
* this can deadlock, and we hit the write error path
*/
bch2_btree_node_wait_on_write(b);
continue;
}
new &= ~(1 << BTREE_NODE_dirty); new &= ~(1 << BTREE_NODE_dirty);
new &= ~(1 << BTREE_NODE_need_write); new &= ~(1 << BTREE_NODE_need_write);
...@@ -1786,6 +1818,9 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b) ...@@ -1786,6 +1818,9 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b)
new ^= (1 << BTREE_NODE_write_idx); new ^= (1 << BTREE_NODE_write_idx);
} while (cmpxchg_acquire(&b->flags, old, new) != old); } while (cmpxchg_acquire(&b->flags, old, new) != old);
if (new & (1U << BTREE_NODE_need_write))
return;
do_write:
atomic_dec(&c->btree_cache.dirty); atomic_dec(&c->btree_cache.dirty);
BUG_ON(btree_node_fake(b)); BUG_ON(btree_node_fake(b));
...@@ -2041,7 +2076,7 @@ void bch2_btree_node_write(struct bch_fs *c, struct btree *b, ...@@ -2041,7 +2076,7 @@ void bch2_btree_node_write(struct bch_fs *c, struct btree *b,
if (lock_type_held == SIX_LOCK_intent || if (lock_type_held == SIX_LOCK_intent ||
(lock_type_held == SIX_LOCK_read && (lock_type_held == SIX_LOCK_read &&
six_lock_tryupgrade(&b->c.lock))) { six_lock_tryupgrade(&b->c.lock))) {
__bch2_btree_node_write(c, b); __bch2_btree_node_write(c, b, false);
/* don't cycle lock unnecessarily: */ /* don't cycle lock unnecessarily: */
if (btree_node_just_written(b) && if (btree_node_just_written(b) &&
...@@ -2053,7 +2088,7 @@ void bch2_btree_node_write(struct bch_fs *c, struct btree *b, ...@@ -2053,7 +2088,7 @@ void bch2_btree_node_write(struct bch_fs *c, struct btree *b,
if (lock_type_held == SIX_LOCK_read) if (lock_type_held == SIX_LOCK_read)
six_lock_downgrade(&b->c.lock); six_lock_downgrade(&b->c.lock);
} else { } else {
__bch2_btree_node_write(c, b); __bch2_btree_node_write(c, b, false);
if (lock_type_held == SIX_LOCK_write && if (lock_type_held == SIX_LOCK_write &&
btree_node_just_written(b)) btree_node_just_written(b))
bch2_btree_post_write_cleanup(c, b); bch2_btree_post_write_cleanup(c, b);
......
...@@ -139,7 +139,7 @@ void bch2_btree_complete_write(struct bch_fs *, struct btree *, ...@@ -139,7 +139,7 @@ void bch2_btree_complete_write(struct bch_fs *, struct btree *,
struct btree_write *); struct btree_write *);
void bch2_btree_write_error_work(struct work_struct *); void bch2_btree_write_error_work(struct work_struct *);
void __bch2_btree_node_write(struct bch_fs *, struct btree *); void __bch2_btree_node_write(struct bch_fs *, struct btree *, bool);
bool bch2_btree_post_write_cleanup(struct bch_fs *, struct btree *); bool bch2_btree_post_write_cleanup(struct bch_fs *, struct btree *);
void bch2_btree_node_write(struct bch_fs *, struct btree *, void bch2_btree_node_write(struct bch_fs *, struct btree *,
...@@ -148,18 +148,11 @@ void bch2_btree_node_write(struct bch_fs *, struct btree *, ...@@ -148,18 +148,11 @@ void bch2_btree_node_write(struct bch_fs *, struct btree *,
static inline void btree_node_write_if_need(struct bch_fs *c, struct btree *b, static inline void btree_node_write_if_need(struct bch_fs *c, struct btree *b,
enum six_lock_type lock_held) enum six_lock_type lock_held)
{ {
while (b->written && if (b->written &&
btree_node_need_write(b) && btree_node_need_write(b) &&
btree_node_may_write(b)) { btree_node_may_write(b) &&
if (!btree_node_write_in_flight(b)) { !btree_node_write_in_flight(b))
bch2_btree_node_write(c, b, lock_held); bch2_btree_node_write(c, b, lock_held);
break;
}
six_unlock_type(&b->c.lock, lock_held);
bch2_btree_node_wait_on_write(b);
btree_node_lock_type(c, b, lock_held);
}
} }
#define bch2_btree_node_write_cond(_c, _b, cond) \ #define bch2_btree_node_write_cond(_c, _b, cond) \
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment