Commit de611ab6 authored by Kent Overstreet's avatar Kent Overstreet

bcachefs: Fix race between trans_put() and btree_transactions_read()

debug.c was using closure_get() on a different thread's closure where
the we don't know if the object being refcounted is alive.

We keep btree_trans objects on a list so they can be printed by debug
code, and because it is cost prohibitive to touch the btree_trans list
every time we allocate and free btree_trans objects, cached objects are
also on this list.

However, we do not want the debug code to see cached but not in use
btree_trans objects - critically because the btree_paths array will have
been freed (if it was reallocated).

closure_get() is also incorrect to use when that get may race with it
hitting zero, i.e. we must already have a ref on the object or know the
ref can't currently hit 0 for other reasons (as used in the cycle
detector).

to fix this, use the previously introduced closure_get_not_zero(),
closure_return_sync(), and closure_init_stack_release(); the debug code
now can only take a ref on a trans object if it's alive and in use.
Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent 06efa5f3
...@@ -3130,7 +3130,6 @@ struct btree_trans *__bch2_trans_get(struct bch_fs *c, unsigned fn_idx) ...@@ -3130,7 +3130,6 @@ struct btree_trans *__bch2_trans_get(struct bch_fs *c, unsigned fn_idx)
trans = mempool_alloc(&c->btree_trans_pool, GFP_NOFS); trans = mempool_alloc(&c->btree_trans_pool, GFP_NOFS);
memset(trans, 0, sizeof(*trans)); memset(trans, 0, sizeof(*trans));
closure_init_stack(&trans->ref);
seqmutex_lock(&c->btree_trans_lock); seqmutex_lock(&c->btree_trans_lock);
if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) { if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) {
...@@ -3161,7 +3160,6 @@ struct btree_trans *__bch2_trans_get(struct bch_fs *c, unsigned fn_idx) ...@@ -3161,7 +3160,6 @@ struct btree_trans *__bch2_trans_get(struct bch_fs *c, unsigned fn_idx)
list_add_done: list_add_done:
seqmutex_unlock(&c->btree_trans_lock); seqmutex_unlock(&c->btree_trans_lock);
got_trans: got_trans:
trans->ref.closure_get_happened = false;
trans->c = c; trans->c = c;
trans->last_begin_time = local_clock(); trans->last_begin_time = local_clock();
trans->fn_idx = fn_idx; trans->fn_idx = fn_idx;
...@@ -3200,6 +3198,8 @@ struct btree_trans *__bch2_trans_get(struct bch_fs *c, unsigned fn_idx) ...@@ -3200,6 +3198,8 @@ struct btree_trans *__bch2_trans_get(struct bch_fs *c, unsigned fn_idx)
trans->srcu_idx = srcu_read_lock(&c->btree_trans_barrier); trans->srcu_idx = srcu_read_lock(&c->btree_trans_barrier);
trans->srcu_lock_time = jiffies; trans->srcu_lock_time = jiffies;
trans->srcu_held = true; trans->srcu_held = true;
closure_init_stack_release(&trans->ref);
return trans; return trans;
} }
...@@ -3257,10 +3257,10 @@ void bch2_trans_put(struct btree_trans *trans) ...@@ -3257,10 +3257,10 @@ void bch2_trans_put(struct btree_trans *trans)
bch2_journal_keys_put(c); bch2_journal_keys_put(c);
/* /*
* trans->ref protects trans->locking_wait.task, btree_paths arary; used * trans->ref protects trans->locking_wait.task, btree_paths array; used
* by cycle detector * by cycle detector
*/ */
closure_sync(&trans->ref); closure_return_sync(&trans->ref);
trans->locking_wait.task = NULL; trans->locking_wait.task = NULL;
unsigned long *paths_allocated = trans->paths_allocated; unsigned long *paths_allocated = trans->paths_allocated;
...@@ -3385,8 +3385,6 @@ void bch2_fs_btree_iter_exit(struct bch_fs *c) ...@@ -3385,8 +3385,6 @@ void bch2_fs_btree_iter_exit(struct bch_fs *c)
per_cpu_ptr(c->btree_trans_bufs, cpu)->trans; per_cpu_ptr(c->btree_trans_bufs, cpu)->trans;
if (trans) { if (trans) {
closure_sync(&trans->ref);
seqmutex_lock(&c->btree_trans_lock); seqmutex_lock(&c->btree_trans_lock);
list_del(&trans->list); list_del(&trans->list);
seqmutex_unlock(&c->btree_trans_lock); seqmutex_unlock(&c->btree_trans_lock);
......
...@@ -587,14 +587,10 @@ static ssize_t bch2_btree_transactions_read(struct file *file, char __user *buf, ...@@ -587,14 +587,10 @@ static ssize_t bch2_btree_transactions_read(struct file *file, char __user *buf,
if (!task || task->pid <= i->iter) if (!task || task->pid <= i->iter)
continue; continue;
closure_get(&trans->ref); if (!closure_get_not_zero(&trans->ref))
u32 seq = seqmutex_unlock(&c->btree_trans_lock); continue;
ret = flush_buf(i); u32 seq = seqmutex_unlock(&c->btree_trans_lock);
if (ret) {
closure_put(&trans->ref);
goto unlocked;
}
bch2_btree_trans_to_text(&i->buf, trans); bch2_btree_trans_to_text(&i->buf, trans);
...@@ -604,10 +600,12 @@ static ssize_t bch2_btree_transactions_read(struct file *file, char __user *buf, ...@@ -604,10 +600,12 @@ static ssize_t bch2_btree_transactions_read(struct file *file, char __user *buf,
printbuf_indent_sub(&i->buf, 2); printbuf_indent_sub(&i->buf, 2);
prt_newline(&i->buf); prt_newline(&i->buf);
i->iter = task->pid;
closure_put(&trans->ref); closure_put(&trans->ref);
ret = flush_buf(i);
if (ret)
goto unlocked;
if (!seqmutex_relock(&c->btree_trans_lock, seq)) if (!seqmutex_relock(&c->btree_trans_lock, seq))
goto restart; goto restart;
} }
...@@ -817,7 +815,8 @@ static void btree_deadlock_to_text(struct printbuf *out, struct bch_fs *c) ...@@ -817,7 +815,8 @@ static void btree_deadlock_to_text(struct printbuf *out, struct bch_fs *c)
iter = task->pid; iter = task->pid;
closure_get(&trans->ref); if (!closure_get_not_zero(&trans->ref))
continue;
u32 seq = seqmutex_unlock(&c->btree_trans_lock); u32 seq = seqmutex_unlock(&c->btree_trans_lock);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment