Commit 59cc38b8 authored by Kent Overstreet's avatar Kent Overstreet Committed by Kent Overstreet

bcachefs: New discard implementation

In the old allocator code, buckets would be discarded just prior to
being used - this made sense in bcache where we were discarding buckets
just after invalidating the cached data they contain, but in a
filesystem where we typically have more free space we want to be
discarding buckets when they become empty.

This patch implements the new behaviour - it checks the need_discard
btree for buckets awaiting discards, and then clears the appropriate
bit in the alloc btree, which moves the buckets to the freespace btree.

Additionally, discards are now enabled by default.
Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent f25d8215
...@@ -545,6 +545,7 @@ int bch2_trans_mark_alloc(struct btree_trans *trans, ...@@ -545,6 +545,7 @@ int bch2_trans_mark_alloc(struct btree_trans *trans,
new_a->io_time[READ] = max_t(u64, 1, atomic64_read(&c->io_clock[READ].now)); new_a->io_time[READ] = max_t(u64, 1, atomic64_read(&c->io_clock[READ].now));
new_a->io_time[WRITE]= max_t(u64, 1, atomic64_read(&c->io_clock[WRITE].now)); new_a->io_time[WRITE]= max_t(u64, 1, atomic64_read(&c->io_clock[WRITE].now));
SET_BCH_ALLOC_V4_NEED_INC_GEN(new_a, true); SET_BCH_ALLOC_V4_NEED_INC_GEN(new_a, true);
SET_BCH_ALLOC_V4_NEED_DISCARD(new_a, true);
} }
if (old_a.data_type && !new_a->data_type && if (old_a.data_type && !new_a->data_type &&
...@@ -579,6 +580,144 @@ int bch2_trans_mark_alloc(struct btree_trans *trans, ...@@ -579,6 +580,144 @@ int bch2_trans_mark_alloc(struct btree_trans *trans,
return 0; return 0;
} }
static int bch2_clear_need_discard(struct btree_trans *trans, struct bpos pos,
struct bch_dev *ca, bool *discard_done)
{
struct bch_fs *c = trans->c;
struct btree_iter iter;
struct bkey_s_c k;
struct bkey_i_alloc_v4 *a;
struct printbuf buf = PRINTBUF;
int ret;
bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc, pos,
BTREE_ITER_CACHED);
k = bch2_btree_iter_peek_slot(&iter);
ret = bkey_err(k);
if (ret)
goto out;
a = bch2_alloc_to_v4_mut(trans, k);
ret = PTR_ERR_OR_ZERO(a);
if (ret)
goto out;
if (BCH_ALLOC_V4_NEED_INC_GEN(&a->v)) {
a->v.gen++;
SET_BCH_ALLOC_V4_NEED_INC_GEN(&a->v, false);
goto write;
}
BUG_ON(a->v.journal_seq > c->journal.flushed_seq_ondisk);
if (bch2_fs_inconsistent_on(!BCH_ALLOC_V4_NEED_DISCARD(&a->v), c,
"%s\n incorrectly set in need_discard btree",
(bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
ret = -EIO;
goto out;
}
if (!*discard_done && ca->mi.discard && !c->opts.nochanges) {
/*
* This works without any other locks because this is the only
* thread that removes items from the need_discard tree
*/
bch2_trans_unlock(trans);
blkdev_issue_discard(ca->disk_sb.bdev,
k.k->p.offset * ca->mi.bucket_size,
ca->mi.bucket_size,
GFP_KERNEL);
*discard_done = true;
ret = bch2_trans_relock(trans) ? 0 : -EINTR;
if (ret)
goto out;
}
SET_BCH_ALLOC_V4_NEED_DISCARD(&a->v, false);
write:
ret = bch2_trans_update(trans, &iter, &a->k_i, 0);
out:
bch2_trans_iter_exit(trans, &iter);
printbuf_exit(&buf);
return ret;
}
static void bch2_do_discards_work(struct work_struct *work)
{
struct bch_fs *c = container_of(work, struct bch_fs, discard_work);
struct bch_dev *ca = NULL;
struct btree_trans trans;
struct btree_iter iter;
struct bkey_s_c k;
u64 seen = 0, open = 0, need_journal_commit = 0, discarded = 0;
int ret;
bch2_trans_init(&trans, c, 0, 0);
for_each_btree_key(&trans, iter, BTREE_ID_need_discard,
POS_MIN, 0, k, ret) {
bool discard_done = false;
if (ca && k.k->p.inode != ca->dev_idx) {
percpu_ref_put(&ca->io_ref);
ca = NULL;
}
if (!ca) {
ca = bch_dev_bkey_exists(c, k.k->p.inode);
if (!percpu_ref_tryget(&ca->io_ref)) {
ca = NULL;
bch2_btree_iter_set_pos(&iter, POS(k.k->p.inode + 1, 0));
continue;
}
}
seen++;
if (bch2_bucket_is_open_safe(c, k.k->p.inode, k.k->p.offset)) {
open++;
continue;
}
if (bch2_bucket_needs_journal_commit(&c->buckets_waiting_for_journal,
c->journal.flushed_seq_ondisk,
k.k->p.inode, k.k->p.offset)) {
need_journal_commit++;
continue;
}
ret = __bch2_trans_do(&trans, NULL, NULL,
BTREE_INSERT_USE_RESERVE|
BTREE_INSERT_NOFAIL,
bch2_clear_need_discard(&trans, k.k->p, ca, &discard_done));
if (ret)
break;
discarded++;
}
bch2_trans_iter_exit(&trans, &iter);
if (ca)
percpu_ref_put(&ca->io_ref);
bch2_trans_exit(&trans);
if (need_journal_commit * 2 > seen)
bch2_journal_flush_async(&c->journal, NULL);
percpu_ref_put(&c->writes);
trace_do_discards(c, seen, open, need_journal_commit, discarded, ret);
}
void bch2_do_discards(struct bch_fs *c)
{
if (percpu_ref_tryget(&c->writes) &&
!queue_work(system_long_wq, &c->discard_work))
percpu_ref_put(&c->writes);
}
static int bch2_dev_freespace_init(struct bch_fs *c, struct bch_dev *ca) static int bch2_dev_freespace_init(struct bch_fs *c, struct bch_dev *ca)
{ {
struct btree_trans trans; struct btree_trans trans;
...@@ -862,4 +1001,5 @@ void bch2_dev_allocator_add(struct bch_fs *c, struct bch_dev *ca) ...@@ -862,4 +1001,5 @@ void bch2_dev_allocator_add(struct bch_fs *c, struct bch_dev *ca)
void bch2_fs_allocator_background_init(struct bch_fs *c) void bch2_fs_allocator_background_init(struct bch_fs *c)
{ {
spin_lock_init(&c->freelist_lock); spin_lock_init(&c->freelist_lock);
INIT_WORK(&c->discard_work, bch2_do_discards_work);
} }
...@@ -113,6 +113,8 @@ int bch2_alloc_read(struct bch_fs *, bool, bool); ...@@ -113,6 +113,8 @@ int bch2_alloc_read(struct bch_fs *, bool, bool);
int bch2_trans_mark_alloc(struct btree_trans *, struct bkey_s_c, int bch2_trans_mark_alloc(struct btree_trans *, struct bkey_s_c,
struct bkey_i *, unsigned); struct bkey_i *, unsigned);
void bch2_do_discards(struct bch_fs *);
int bch2_fs_freespace_init(struct bch_fs *); int bch2_fs_freespace_init(struct bch_fs *);
void bch2_recalc_capacity(struct bch_fs *); void bch2_recalc_capacity(struct bch_fs *);
......
...@@ -758,6 +758,7 @@ struct bch_fs { ...@@ -758,6 +758,7 @@ struct bch_fs {
unsigned write_points_nr; unsigned write_points_nr;
struct buckets_waiting_for_journal buckets_waiting_for_journal; struct buckets_waiting_for_journal buckets_waiting_for_journal;
struct work_struct discard_work;
/* GARBAGE COLLECTION */ /* GARBAGE COLLECTION */
struct task_struct *gc_thread; struct task_struct *gc_thread;
......
...@@ -543,6 +543,11 @@ int bch2_mark_alloc(struct btree_trans *trans, ...@@ -543,6 +543,11 @@ int bch2_mark_alloc(struct btree_trans *trans,
(!new_a.journal_seq || new_a.journal_seq < c->journal.flushed_seq_ondisk)) (!new_a.journal_seq || new_a.journal_seq < c->journal.flushed_seq_ondisk))
closure_wake_up(&c->freelist_wait); closure_wake_up(&c->freelist_wait);
if ((flags & BTREE_TRIGGER_INSERT) &&
BCH_ALLOC_V4_NEED_DISCARD(&new_a) &&
!new_a.journal_seq)
bch2_do_discards(c);
if (bucket_state(new_a) == BUCKET_need_gc_gens) { if (bucket_state(new_a) == BUCKET_need_gc_gens) {
atomic_inc(&c->kick_gc); atomic_inc(&c->kick_gc);
wake_up_process(c->gc_thread); wake_up_process(c->gc_thread);
......
// SPDX-License-Identifier: GPL-2.0 // SPDX-License-Identifier: GPL-2.0
#include "bcachefs.h" #include "bcachefs.h"
#include "alloc_background.h"
#include "alloc_foreground.h" #include "alloc_foreground.h"
#include "btree_io.h" #include "btree_io.h"
#include "btree_update_interior.h" #include "btree_update_interior.h"
...@@ -1399,6 +1400,7 @@ static void journal_write_done(struct closure *cl) ...@@ -1399,6 +1400,7 @@ static void journal_write_done(struct closure *cl)
j->flushed_seq_ondisk = seq; j->flushed_seq_ondisk = seq;
j->last_seq_ondisk = w->last_seq; j->last_seq_ondisk = w->last_seq;
bch2_do_discards(c);
closure_wake_up(&c->freelist_wait); closure_wake_up(&c->freelist_wait);
bch2_reset_alloc_cursors(c); bch2_reset_alloc_cursors(c);
......
...@@ -266,7 +266,7 @@ enum opt_type { ...@@ -266,7 +266,7 @@ enum opt_type {
x(discard, u8, \ x(discard, u8, \
OPT_FS|OPT_MOUNT|OPT_DEVICE, \ OPT_FS|OPT_MOUNT|OPT_DEVICE, \
OPT_BOOL(), \ OPT_BOOL(), \
BCH2_NO_SB_OPT, false, \ BCH2_NO_SB_OPT, true, \
NULL, "Enable discard/TRIM support") \ NULL, "Enable discard/TRIM support") \
x(verbose, u8, \ x(verbose, u8, \
OPT_FS|OPT_MOUNT, \ OPT_FS|OPT_MOUNT, \
......
...@@ -401,6 +401,8 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early) ...@@ -401,6 +401,8 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early)
bch2_dev_allocator_add(c, ca); bch2_dev_allocator_add(c, ca);
bch2_recalc_capacity(c); bch2_recalc_capacity(c);
bch2_do_discards(c);
if (!early) { if (!early) {
ret = bch2_fs_read_write_late(c); ret = bch2_fs_read_write_late(c);
if (ret) if (ret)
......
...@@ -182,6 +182,40 @@ TRACE_EVENT(journal_reclaim_finish, ...@@ -182,6 +182,40 @@ TRACE_EVENT(journal_reclaim_finish,
__entry->nr_flushed) __entry->nr_flushed)
); );
/* allocator: */
TRACE_EVENT(do_discards,
TP_PROTO(struct bch_fs *c, u64 seen, u64 open,
u64 need_journal_commit, u64 discarded, int ret),
TP_ARGS(c, seen, open, need_journal_commit, discarded, ret),
TP_STRUCT__entry(
__field(dev_t, dev )
__field(u64, seen )
__field(u64, open )
__field(u64, need_journal_commit )
__field(u64, discarded )
__field(int, ret )
),
TP_fast_assign(
__entry->dev = c->dev;
__entry->seen = seen;
__entry->open = open;
__entry->need_journal_commit = need_journal_commit;
__entry->discarded = discarded;
__entry->ret = ret;
),
TP_printk("%d%d seen %llu open %llu need_journal_commit %llu discarded %llu ret %i",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->seen,
__entry->open,
__entry->need_journal_commit,
__entry->discarded,
__entry->ret)
);
/* bset.c: */ /* bset.c: */
DEFINE_EVENT(bpos, bkey_pack_pos_fail, DEFINE_EVENT(bpos, bkey_pack_pos_fail,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment