Commit ebb84d09 authored by Kent Overstreet's avatar Kent Overstreet Committed by Kent Overstreet

bcachefs: Increase journal pipelining

This patch increases the maximum journal buffers in flight from 2 to 4 -
this will be particularly helpful when in the future we stop requiring
flush+fua for every journal write.
Signed-off-by: default avatarKent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent 5db43418
...@@ -23,7 +23,7 @@ static u64 last_unwritten_seq(struct journal *j) ...@@ -23,7 +23,7 @@ static u64 last_unwritten_seq(struct journal *j)
lockdep_assert_held(&j->lock); lockdep_assert_held(&j->lock);
return journal_cur_seq(j) - s.prev_buf_unwritten; return journal_cur_seq(j) - ((s.idx - s.unwritten_idx) & JOURNAL_BUF_MASK);
} }
static inline bool journal_seq_unwritten(struct journal *j, u64 seq) static inline bool journal_seq_unwritten(struct journal *j, u64 seq)
...@@ -51,7 +51,7 @@ journal_seq_to_buf(struct journal *j, u64 seq) ...@@ -51,7 +51,7 @@ journal_seq_to_buf(struct journal *j, u64 seq)
j->reservations.cur_entry_offset == JOURNAL_ENTRY_CLOSED_VAL); j->reservations.cur_entry_offset == JOURNAL_ENTRY_CLOSED_VAL);
if (journal_seq_unwritten(j, seq)) { if (journal_seq_unwritten(j, seq)) {
buf = j->buf + (seq & 1); buf = j->buf + (seq & JOURNAL_BUF_MASK);
EBUG_ON(le64_to_cpu(buf->data->seq) != seq); EBUG_ON(le64_to_cpu(buf->data->seq) != seq);
} }
return buf; return buf;
...@@ -108,15 +108,8 @@ void bch2_journal_halt(struct journal *j) ...@@ -108,15 +108,8 @@ void bch2_journal_halt(struct journal *j)
/* journal entry close/open: */ /* journal entry close/open: */
void __bch2_journal_buf_put(struct journal *j, bool need_write_just_set) void __bch2_journal_buf_put(struct journal *j)
{ {
if (!need_write_just_set &&
test_bit(JOURNAL_NEED_WRITE, &j->flags))
bch2_time_stats_update(j->delay_time,
j->need_write_time);
clear_bit(JOURNAL_NEED_WRITE, &j->flags);
closure_call(&j->io, bch2_journal_write, system_highpri_wq, NULL); closure_call(&j->io, bch2_journal_write, system_highpri_wq, NULL);
} }
...@@ -129,7 +122,6 @@ static bool __journal_entry_close(struct journal *j) ...@@ -129,7 +122,6 @@ static bool __journal_entry_close(struct journal *j)
struct journal_buf *buf = journal_cur_buf(j); struct journal_buf *buf = journal_cur_buf(j);
union journal_res_state old, new; union journal_res_state old, new;
u64 v = atomic64_read(&j->reservations.counter); u64 v = atomic64_read(&j->reservations.counter);
bool set_need_write = false;
unsigned sectors; unsigned sectors;
lockdep_assert_held(&j->lock); lockdep_assert_held(&j->lock);
...@@ -148,15 +140,13 @@ static bool __journal_entry_close(struct journal *j) ...@@ -148,15 +140,13 @@ static bool __journal_entry_close(struct journal *j)
if (!test_bit(JOURNAL_NEED_WRITE, &j->flags)) { if (!test_bit(JOURNAL_NEED_WRITE, &j->flags)) {
set_bit(JOURNAL_NEED_WRITE, &j->flags); set_bit(JOURNAL_NEED_WRITE, &j->flags);
j->need_write_time = local_clock(); j->need_write_time = local_clock();
set_need_write = true;
} }
if (new.prev_buf_unwritten)
return false;
new.cur_entry_offset = JOURNAL_ENTRY_CLOSED_VAL; new.cur_entry_offset = JOURNAL_ENTRY_CLOSED_VAL;
new.idx++; new.idx++;
new.prev_buf_unwritten = 1;
if (new.idx == new.unwritten_idx)
return false;
BUG_ON(journal_state_count(new, new.idx)); BUG_ON(journal_state_count(new, new.idx));
} while ((v = atomic64_cmpxchg(&j->reservations.counter, } while ((v = atomic64_cmpxchg(&j->reservations.counter,
...@@ -190,24 +180,44 @@ static bool __journal_entry_close(struct journal *j) ...@@ -190,24 +180,44 @@ static bool __journal_entry_close(struct journal *j)
*/ */
buf->data->last_seq = cpu_to_le64(journal_last_seq(j)); buf->data->last_seq = cpu_to_le64(journal_last_seq(j));
__bch2_journal_pin_put(j, le64_to_cpu(buf->data->seq));
journal_pin_new_entry(j, 1); journal_pin_new_entry(j, 1);
bch2_journal_buf_init(j); bch2_journal_buf_init(j);
cancel_delayed_work(&j->write_work); cancel_delayed_work(&j->write_work);
clear_bit(JOURNAL_NEED_WRITE, &j->flags);
bch2_journal_space_available(j); bch2_journal_space_available(j);
bch2_journal_buf_put(j, old.idx, set_need_write); bch2_journal_buf_put(j, old.idx);
return true; return true;
} }
static bool journal_entry_want_write(struct journal *j)
{
union journal_res_state s = READ_ONCE(j->reservations);
bool ret = false;
/*
* Don't close it yet if we already have a write in flight, but do set
* NEED_WRITE:
*/
if (s.idx != s.unwritten_idx)
set_bit(JOURNAL_NEED_WRITE, &j->flags);
else
ret = __journal_entry_close(j);
return ret;
}
static bool journal_entry_close(struct journal *j) static bool journal_entry_close(struct journal *j)
{ {
bool ret; bool ret;
spin_lock(&j->lock); spin_lock(&j->lock);
ret = __journal_entry_close(j); ret = journal_entry_want_write(j);
spin_unlock(&j->lock); spin_unlock(&j->lock);
return ret; return ret;
...@@ -289,8 +299,8 @@ static int journal_entry_open(struct journal *j) ...@@ -289,8 +299,8 @@ static int journal_entry_open(struct journal *j)
static bool journal_quiesced(struct journal *j) static bool journal_quiesced(struct journal *j)
{ {
union journal_res_state state = READ_ONCE(j->reservations); union journal_res_state s = READ_ONCE(j->reservations);
bool ret = !state.prev_buf_unwritten && !__journal_entry_is_open(state); bool ret = s.idx == s.unwritten_idx && !__journal_entry_is_open(s);
if (!ret) if (!ret)
journal_entry_close(j); journal_entry_close(j);
...@@ -317,17 +327,29 @@ static void journal_write_work(struct work_struct *work) ...@@ -317,17 +327,29 @@ static void journal_write_work(struct work_struct *work)
u64 bch2_inode_journal_seq(struct journal *j, u64 inode) u64 bch2_inode_journal_seq(struct journal *j, u64 inode)
{ {
size_t h = hash_64(inode, ilog2(sizeof(j->buf[0].has_inode) * 8)); size_t h = hash_64(inode, ilog2(sizeof(j->buf[0].has_inode) * 8));
u64 seq = 0; union journal_res_state s;
unsigned i;
u64 seq;
if (!test_bit(h, j->buf[0].has_inode) &&
!test_bit(h, j->buf[1].has_inode))
return 0;
spin_lock(&j->lock); spin_lock(&j->lock);
if (test_bit(h, journal_cur_buf(j)->has_inode)) seq = journal_cur_seq(j);
seq = journal_cur_seq(j); s = READ_ONCE(j->reservations);
else if (test_bit(h, journal_prev_buf(j)->has_inode)) i = s.idx;
seq = journal_cur_seq(j) - 1;
while (1) {
if (test_bit(h, j->buf[i].has_inode))
goto out;
if (i == s.unwritten_idx)
break;
i = (i - 1) & JOURNAL_BUF_MASK;
seq--;
}
seq = 0;
out:
spin_unlock(&j->lock); spin_unlock(&j->lock);
return seq; return seq;
...@@ -574,7 +596,7 @@ int bch2_journal_flush_seq_async(struct journal *j, u64 seq, ...@@ -574,7 +596,7 @@ int bch2_journal_flush_seq_async(struct journal *j, u64 seq,
BUG(); BUG();
if (seq == journal_cur_seq(j)) if (seq == journal_cur_seq(j))
__journal_entry_close(j); journal_entry_want_write(j);
out: out:
spin_unlock(&j->lock); spin_unlock(&j->lock);
return ret; return ret;
...@@ -863,15 +885,18 @@ int bch2_dev_journal_alloc(struct bch_dev *ca) ...@@ -863,15 +885,18 @@ int bch2_dev_journal_alloc(struct bch_dev *ca)
static bool bch2_journal_writing_to_device(struct journal *j, unsigned dev_idx) static bool bch2_journal_writing_to_device(struct journal *j, unsigned dev_idx)
{ {
union journal_res_state state; union journal_res_state state;
struct journal_buf *w; bool ret = false;
bool ret; unsigned i;
spin_lock(&j->lock); spin_lock(&j->lock);
state = READ_ONCE(j->reservations); state = READ_ONCE(j->reservations);
w = j->buf + !state.idx; i = state.idx;
ret = state.prev_buf_unwritten && while (i != state.unwritten_idx) {
bch2_bkey_has_device(bkey_i_to_s_c(&w->key), dev_idx); i = (i - 1) & JOURNAL_BUF_MASK;
if (bch2_bkey_has_device(bkey_i_to_s_c(&j->buf[i].key), dev_idx))
ret = true;
}
spin_unlock(&j->lock); spin_unlock(&j->lock);
return ret; return ret;
...@@ -957,7 +982,7 @@ int bch2_fs_journal_start(struct journal *j, u64 cur_seq, ...@@ -957,7 +982,7 @@ int bch2_fs_journal_start(struct journal *j, u64 cur_seq,
journal_pin_new_entry(j, 1); journal_pin_new_entry(j, 1);
j->reservations.idx = journal_cur_seq(j); j->reservations.idx = j->reservations.unwritten_idx = journal_cur_seq(j);
bch2_journal_buf_init(j); bch2_journal_buf_init(j);
...@@ -1015,8 +1040,10 @@ int bch2_dev_journal_init(struct bch_dev *ca, struct bch_sb *sb) ...@@ -1015,8 +1040,10 @@ int bch2_dev_journal_init(struct bch_dev *ca, struct bch_sb *sb)
void bch2_fs_journal_exit(struct journal *j) void bch2_fs_journal_exit(struct journal *j)
{ {
kvpfree(j->buf[1].data, j->buf[1].buf_size); unsigned i;
kvpfree(j->buf[0].data, j->buf[0].buf_size);
for (i = 0; i < ARRAY_SIZE(j->buf); i++)
kvpfree(j->buf[i].data, j->buf[i].buf_size);
free_fifo(&j->pin); free_fifo(&j->pin);
} }
...@@ -1024,6 +1051,7 @@ int bch2_fs_journal_init(struct journal *j) ...@@ -1024,6 +1051,7 @@ int bch2_fs_journal_init(struct journal *j)
{ {
struct bch_fs *c = container_of(j, struct bch_fs, journal); struct bch_fs *c = container_of(j, struct bch_fs, journal);
static struct lock_class_key res_key; static struct lock_class_key res_key;
unsigned i;
int ret = 0; int ret = 0;
pr_verbose_init(c->opts, ""); pr_verbose_init(c->opts, "");
...@@ -1038,8 +1066,6 @@ int bch2_fs_journal_init(struct journal *j) ...@@ -1038,8 +1066,6 @@ int bch2_fs_journal_init(struct journal *j)
lockdep_init_map(&j->res_map, "journal res", &res_key, 0); lockdep_init_map(&j->res_map, "journal res", &res_key, 0);
j->buf[0].buf_size = JOURNAL_ENTRY_SIZE_MIN;
j->buf[1].buf_size = JOURNAL_ENTRY_SIZE_MIN;
j->write_delay_ms = 1000; j->write_delay_ms = 1000;
j->reclaim_delay_ms = 100; j->reclaim_delay_ms = 100;
...@@ -1051,13 +1077,20 @@ int bch2_fs_journal_init(struct journal *j) ...@@ -1051,13 +1077,20 @@ int bch2_fs_journal_init(struct journal *j)
((union journal_res_state) ((union journal_res_state)
{ .cur_entry_offset = JOURNAL_ENTRY_CLOSED_VAL }).v); { .cur_entry_offset = JOURNAL_ENTRY_CLOSED_VAL }).v);
if (!(init_fifo(&j->pin, JOURNAL_PIN, GFP_KERNEL)) || if (!(init_fifo(&j->pin, JOURNAL_PIN, GFP_KERNEL))) {
!(j->buf[0].data = kvpmalloc(j->buf[0].buf_size, GFP_KERNEL)) ||
!(j->buf[1].data = kvpmalloc(j->buf[1].buf_size, GFP_KERNEL))) {
ret = -ENOMEM; ret = -ENOMEM;
goto out; goto out;
} }
for (i = 0; i < ARRAY_SIZE(j->buf); i++) {
j->buf[i].buf_size = JOURNAL_ENTRY_SIZE_MIN;
j->buf[i].data = kvpmalloc(j->buf[i].buf_size, GFP_KERNEL);
if (!j->buf[i].data) {
ret = -ENOMEM;
goto out;
}
}
j->pin.front = j->pin.back = 1; j->pin.front = j->pin.back = 1;
out: out:
pr_verbose_init(c->opts, "ret %i", ret); pr_verbose_init(c->opts, "ret %i", ret);
...@@ -1071,7 +1104,7 @@ void bch2_journal_debug_to_text(struct printbuf *out, struct journal *j) ...@@ -1071,7 +1104,7 @@ void bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
struct bch_fs *c = container_of(j, struct bch_fs, journal); struct bch_fs *c = container_of(j, struct bch_fs, journal);
union journal_res_state s; union journal_res_state s;
struct bch_dev *ca; struct bch_dev *ca;
unsigned iter; unsigned i;
rcu_read_lock(); rcu_read_lock();
spin_lock(&j->lock); spin_lock(&j->lock);
...@@ -1114,16 +1147,16 @@ void bch2_journal_debug_to_text(struct printbuf *out, struct journal *j) ...@@ -1114,16 +1147,16 @@ void bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
} }
pr_buf(out, pr_buf(out,
"current entry refs:\t%u\n" "current entry:\tidx %u refcount %u\n",
"prev entry unwritten:\t", s.idx, journal_state_count(s, s.idx));
journal_state_count(s, s.idx));
i = s.idx;
if (s.prev_buf_unwritten) while (i != s.unwritten_idx) {
pr_buf(out, "yes, ref %u sectors %u\n", i = (i - 1) & JOURNAL_BUF_MASK;
journal_state_count(s, !s.idx),
journal_prev_buf(j)->sectors); pr_buf(out, "unwritten entry:\tidx %u refcount %u sectors %u\n",
else i, journal_state_count(s, i), j->buf[i].sectors);
pr_buf(out, "no\n"); }
pr_buf(out, pr_buf(out,
"need write:\t\t%i\n" "need write:\t\t%i\n"
...@@ -1131,7 +1164,7 @@ void bch2_journal_debug_to_text(struct printbuf *out, struct journal *j) ...@@ -1131,7 +1164,7 @@ void bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
test_bit(JOURNAL_NEED_WRITE, &j->flags), test_bit(JOURNAL_NEED_WRITE, &j->flags),
test_bit(JOURNAL_REPLAY_DONE, &j->flags)); test_bit(JOURNAL_REPLAY_DONE, &j->flags));
for_each_member_device_rcu(ca, c, iter, for_each_member_device_rcu(ca, c, i,
&c->rw_devs[BCH_DATA_journal]) { &c->rw_devs[BCH_DATA_journal]) {
struct journal_device *ja = &ca->journal; struct journal_device *ja = &ca->journal;
...@@ -1146,7 +1179,7 @@ void bch2_journal_debug_to_text(struct printbuf *out, struct journal *j) ...@@ -1146,7 +1179,7 @@ void bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
"\tdirty_idx_ondisk\t%u (seq %llu)\n" "\tdirty_idx_ondisk\t%u (seq %llu)\n"
"\tdirty_idx\t\t%u (seq %llu)\n" "\tdirty_idx\t\t%u (seq %llu)\n"
"\tcur_idx\t\t%u (seq %llu)\n", "\tcur_idx\t\t%u (seq %llu)\n",
iter, ja->nr, i, ja->nr,
bch2_journal_dev_buckets_available(j, ja, journal_space_discarded), bch2_journal_dev_buckets_available(j, ja, journal_space_discarded),
ja->sectors_free, ja->sectors_free,
ja->discard_idx, ja->discard_idx,
......
...@@ -127,11 +127,6 @@ static inline struct journal_buf *journal_cur_buf(struct journal *j) ...@@ -127,11 +127,6 @@ static inline struct journal_buf *journal_cur_buf(struct journal *j)
return j->buf + j->reservations.idx; return j->buf + j->reservations.idx;
} }
static inline struct journal_buf *journal_prev_buf(struct journal *j)
{
return j->buf + !j->reservations.idx;
}
/* Sequence number of oldest dirty journal entry */ /* Sequence number of oldest dirty journal entry */
static inline u64 journal_last_seq(struct journal *j) static inline u64 journal_last_seq(struct journal *j)
...@@ -151,13 +146,21 @@ void bch2_journal_set_has_inum(struct journal *, u64, u64); ...@@ -151,13 +146,21 @@ void bch2_journal_set_has_inum(struct journal *, u64, u64);
static inline int journal_state_count(union journal_res_state s, int idx) static inline int journal_state_count(union journal_res_state s, int idx)
{ {
return idx == 0 ? s.buf0_count : s.buf1_count; switch (idx) {
case 0: return s.buf0_count;
case 1: return s.buf1_count;
case 2: return s.buf2_count;
case 3: return s.buf3_count;
}
BUG();
} }
static inline void journal_state_inc(union journal_res_state *s) static inline void journal_state_inc(union journal_res_state *s)
{ {
s->buf0_count += s->idx == 0; s->buf0_count += s->idx == 0;
s->buf1_count += s->idx == 1; s->buf1_count += s->idx == 1;
s->buf2_count += s->idx == 2;
s->buf3_count += s->idx == 3;
} }
static inline void bch2_journal_set_has_inode(struct journal *j, static inline void bch2_journal_set_has_inode(struct journal *j,
...@@ -257,21 +260,24 @@ static inline bool journal_entry_empty(struct jset *j) ...@@ -257,21 +260,24 @@ static inline bool journal_entry_empty(struct jset *j)
return true; return true;
} }
void __bch2_journal_buf_put(struct journal *, bool); void __bch2_journal_buf_put(struct journal *);
static inline void bch2_journal_buf_put(struct journal *j, unsigned idx, static inline void bch2_journal_buf_put(struct journal *j, unsigned idx)
bool need_write_just_set)
{ {
union journal_res_state s; union journal_res_state s;
s.v = atomic64_sub_return(((union journal_res_state) { s.v = atomic64_sub_return(((union journal_res_state) {
.buf0_count = idx == 0, .buf0_count = idx == 0,
.buf1_count = idx == 1, .buf1_count = idx == 1,
.buf2_count = idx == 2,
.buf3_count = idx == 3,
}).v, &j->reservations.counter); }).v, &j->reservations.counter);
if (!journal_state_count(s, idx)) {
EBUG_ON(s.idx == idx || !s.prev_buf_unwritten); EBUG_ON(((s.idx - idx) & 3) >
__bch2_journal_buf_put(j, need_write_just_set); ((s.idx - s.unwritten_idx) & 3));
}
if (!journal_state_count(s, idx) && idx == s.unwritten_idx)
__bch2_journal_buf_put(j);
} }
/* /*
...@@ -291,7 +297,7 @@ static inline void bch2_journal_res_put(struct journal *j, ...@@ -291,7 +297,7 @@ static inline void bch2_journal_res_put(struct journal *j,
BCH_JSET_ENTRY_btree_keys, BCH_JSET_ENTRY_btree_keys,
0, 0, NULL, 0); 0, 0, NULL, 0);
bch2_journal_buf_put(j, res->idx, false); bch2_journal_buf_put(j, res->idx);
res->ref = 0; res->ref = 0;
} }
...@@ -327,11 +333,18 @@ static inline int journal_res_get_fast(struct journal *j, ...@@ -327,11 +333,18 @@ static inline int journal_res_get_fast(struct journal *j,
!test_bit(JOURNAL_MAY_GET_UNRESERVED, &j->flags)) !test_bit(JOURNAL_MAY_GET_UNRESERVED, &j->flags))
return 0; return 0;
if (flags & JOURNAL_RES_GET_CHECK)
return 1;
new.cur_entry_offset += res->u64s; new.cur_entry_offset += res->u64s;
journal_state_inc(&new); journal_state_inc(&new);
/*
* If the refcount would overflow, we have to wait:
* XXX - tracepoint this:
*/
if (!journal_state_count(new, new.idx))
return 0;
if (flags & JOURNAL_RES_GET_CHECK)
return 1;
} while ((v = atomic64_cmpxchg(&j->reservations.counter, } while ((v = atomic64_cmpxchg(&j->reservations.counter,
old.v, new.v)) != old.v); old.v, new.v)) != old.v);
......
...@@ -950,16 +950,23 @@ static void journal_buf_realloc(struct journal *j, struct journal_buf *buf) ...@@ -950,16 +950,23 @@ static void journal_buf_realloc(struct journal *j, struct journal_buf *buf)
buf->buf_size = new_size; buf->buf_size = new_size;
} }
static inline struct journal_buf *journal_last_unwritten_buf(struct journal *j)
{
return j->buf + j->reservations.unwritten_idx;
}
static void journal_write_done(struct closure *cl) static void journal_write_done(struct closure *cl)
{ {
struct journal *j = container_of(cl, struct journal, io); struct journal *j = container_of(cl, struct journal, io);
struct bch_fs *c = container_of(j, struct bch_fs, journal); struct bch_fs *c = container_of(j, struct bch_fs, journal);
struct journal_buf *w = journal_prev_buf(j); struct journal_buf *w = journal_last_unwritten_buf(j);
struct bch_devs_list devs = struct bch_devs_list devs =
bch2_bkey_devs(bkey_i_to_s_c(&w->key)); bch2_bkey_devs(bkey_i_to_s_c(&w->key));
struct bch_replicas_padded replicas; struct bch_replicas_padded replicas;
union journal_res_state old, new;
u64 seq = le64_to_cpu(w->data->seq); u64 seq = le64_to_cpu(w->data->seq);
u64 last_seq = le64_to_cpu(w->data->last_seq); u64 last_seq = le64_to_cpu(w->data->last_seq);
u64 v;
int err = 0; int err = 0;
bch2_time_stats_update(j->write_time, j->write_start_time); bch2_time_stats_update(j->write_time, j->write_start_time);
...@@ -998,9 +1005,14 @@ static void journal_write_done(struct closure *cl) ...@@ -998,9 +1005,14 @@ static void journal_write_done(struct closure *cl)
/* also must come before signalling write completion: */ /* also must come before signalling write completion: */
closure_debug_destroy(cl); closure_debug_destroy(cl);
BUG_ON(!j->reservations.prev_buf_unwritten); v = atomic64_read(&j->reservations.counter);
atomic64_sub(((union journal_res_state) { .prev_buf_unwritten = 1 }).v, do {
&j->reservations.counter); old.v = new.v = v;
BUG_ON(new.idx == new.unwritten_idx);
new.unwritten_idx++;
} while ((v = atomic64_cmpxchg(&j->reservations.counter,
old.v, new.v)) != old.v);
closure_wake_up(&w->wait); closure_wake_up(&w->wait);
journal_wake(j); journal_wake(j);
...@@ -1008,6 +1020,10 @@ static void journal_write_done(struct closure *cl) ...@@ -1008,6 +1020,10 @@ static void journal_write_done(struct closure *cl)
if (test_bit(JOURNAL_NEED_WRITE, &j->flags)) if (test_bit(JOURNAL_NEED_WRITE, &j->flags))
mod_delayed_work(system_freezable_wq, &j->write_work, 0); mod_delayed_work(system_freezable_wq, &j->write_work, 0);
spin_unlock(&j->lock); spin_unlock(&j->lock);
if (new.unwritten_idx != new.idx &&
!journal_state_count(new, new.unwritten_idx))
closure_call(&j->io, bch2_journal_write, system_highpri_wq, NULL);
} }
static void journal_write_endio(struct bio *bio) static void journal_write_endio(struct bio *bio)
...@@ -1018,7 +1034,7 @@ static void journal_write_endio(struct bio *bio) ...@@ -1018,7 +1034,7 @@ static void journal_write_endio(struct bio *bio)
if (bch2_dev_io_err_on(bio->bi_status, ca, "journal write error: %s", if (bch2_dev_io_err_on(bio->bi_status, ca, "journal write error: %s",
bch2_blk_status_to_str(bio->bi_status)) || bch2_blk_status_to_str(bio->bi_status)) ||
bch2_meta_write_fault("journal")) { bch2_meta_write_fault("journal")) {
struct journal_buf *w = journal_prev_buf(j); struct journal_buf *w = journal_last_unwritten_buf(j);
unsigned long flags; unsigned long flags;
spin_lock_irqsave(&j->err_lock, flags); spin_lock_irqsave(&j->err_lock, flags);
...@@ -1035,7 +1051,7 @@ void bch2_journal_write(struct closure *cl) ...@@ -1035,7 +1051,7 @@ void bch2_journal_write(struct closure *cl)
struct journal *j = container_of(cl, struct journal, io); struct journal *j = container_of(cl, struct journal, io);
struct bch_fs *c = container_of(j, struct bch_fs, journal); struct bch_fs *c = container_of(j, struct bch_fs, journal);
struct bch_dev *ca; struct bch_dev *ca;
struct journal_buf *w = journal_prev_buf(j); struct journal_buf *w = journal_last_unwritten_buf(j);
struct jset_entry *start, *end; struct jset_entry *start, *end;
struct jset *jset; struct jset *jset;
struct bio *bio; struct bio *bio;
...@@ -1046,8 +1062,6 @@ void bch2_journal_write(struct closure *cl) ...@@ -1046,8 +1062,6 @@ void bch2_journal_write(struct closure *cl)
BUG_ON(BCH_SB_CLEAN(c->disk_sb.sb)); BUG_ON(BCH_SB_CLEAN(c->disk_sb.sb));
bch2_journal_pin_put(j, le64_to_cpu(w->data->seq));
journal_buf_realloc(j, w); journal_buf_realloc(j, w);
jset = w->data; jset = w->data;
......
...@@ -58,6 +58,19 @@ static void journal_set_remaining(struct journal *j, unsigned u64s_remaining) ...@@ -58,6 +58,19 @@ static void journal_set_remaining(struct journal *j, unsigned u64s_remaining)
old.v, new.v)) != old.v); old.v, new.v)) != old.v);
} }
static inline unsigned get_unwritten_sectors(struct journal *j, unsigned *idx)
{
unsigned sectors = 0;
while (!sectors && *idx != j->reservations.idx) {
sectors = j->buf[*idx].sectors;
*idx = (*idx + 1) & JOURNAL_BUF_MASK;
}
return sectors;
}
static struct journal_space { static struct journal_space {
unsigned next_entry; unsigned next_entry;
unsigned remaining; unsigned remaining;
...@@ -69,15 +82,14 @@ static struct journal_space { ...@@ -69,15 +82,14 @@ static struct journal_space {
unsigned sectors_next_entry = UINT_MAX; unsigned sectors_next_entry = UINT_MAX;
unsigned sectors_total = UINT_MAX; unsigned sectors_total = UINT_MAX;
unsigned i, nr_devs = 0; unsigned i, nr_devs = 0;
unsigned unwritten_sectors = j->reservations.prev_buf_unwritten unsigned unwritten_sectors;
? journal_prev_buf(j)->sectors
: 0;
rcu_read_lock(); rcu_read_lock();
for_each_member_device_rcu(ca, c, i, for_each_member_device_rcu(ca, c, i,
&c->rw_devs[BCH_DATA_journal]) { &c->rw_devs[BCH_DATA_journal]) {
struct journal_device *ja = &ca->journal; struct journal_device *ja = &ca->journal;
unsigned buckets_this_device, sectors_this_device; unsigned buckets_this_device, sectors_this_device;
unsigned idx = j->reservations.unwritten_idx;
if (!ja->nr) if (!ja->nr)
continue; continue;
...@@ -89,16 +101,20 @@ static struct journal_space { ...@@ -89,16 +101,20 @@ static struct journal_space {
* We that we don't allocate the space for a journal entry * We that we don't allocate the space for a journal entry
* until we write it out - thus, account for it here: * until we write it out - thus, account for it here:
*/ */
if (unwritten_sectors >= sectors_this_device) { while ((unwritten_sectors = get_unwritten_sectors(j, &idx))) {
if (!buckets_this_device) if (unwritten_sectors >= sectors_this_device) {
continue; if (!buckets_this_device) {
sectors_this_device = 0;
buckets_this_device--; break;
sectors_this_device = ca->mi.bucket_size; }
buckets_this_device--;
sectors_this_device = ca->mi.bucket_size;
}
sectors_this_device -= unwritten_sectors;
} }
sectors_this_device -= unwritten_sectors;
if (sectors_this_device < ca->mi.bucket_size && if (sectors_this_device < ca->mi.bucket_size &&
buckets_this_device) { buckets_this_device) {
buckets_this_device--; buckets_this_device--;
...@@ -277,6 +293,14 @@ static void bch2_journal_reclaim_fast(struct journal *j) ...@@ -277,6 +293,14 @@ static void bch2_journal_reclaim_fast(struct journal *j)
bch2_journal_space_available(j); bch2_journal_space_available(j);
} }
void __bch2_journal_pin_put(struct journal *j, u64 seq)
{
struct journal_entry_pin_list *pin_list = journal_seq_pin(j, seq);
if (atomic_dec_and_test(&pin_list->count))
bch2_journal_reclaim_fast(j);
}
void bch2_journal_pin_put(struct journal *j, u64 seq) void bch2_journal_pin_put(struct journal *j, u64 seq)
{ {
struct journal_entry_pin_list *pin_list = journal_seq_pin(j, seq); struct journal_entry_pin_list *pin_list = journal_seq_pin(j, seq);
......
...@@ -39,6 +39,7 @@ journal_seq_pin(struct journal *j, u64 seq) ...@@ -39,6 +39,7 @@ journal_seq_pin(struct journal *j, u64 seq)
return &j->pin.data[seq & j->pin.mask]; return &j->pin.data[seq & j->pin.mask];
} }
void __bch2_journal_pin_put(struct journal *, u64);
void bch2_journal_pin_put(struct journal *, u64); void bch2_journal_pin_put(struct journal *, u64);
void bch2_journal_pin_drop(struct journal *, struct journal_entry_pin *); void bch2_journal_pin_drop(struct journal *, struct journal_entry_pin *);
......
...@@ -11,13 +11,13 @@ ...@@ -11,13 +11,13 @@
struct journal_res; struct journal_res;
#define JOURNAL_BUF_BITS 1 #define JOURNAL_BUF_BITS 2
#define JOURNAL_BUF_NR (1U << JOURNAL_BUF_BITS) #define JOURNAL_BUF_NR (1U << JOURNAL_BUF_BITS)
#define JOURNAL_BUF_MASK (JOURNAL_BUF_NR - 1) #define JOURNAL_BUF_MASK (JOURNAL_BUF_NR - 1)
/* /*
* We put two of these in struct journal; we used them for writes to the * We put JOURNAL_BUF_NR of these in struct journal; we used them for writes to
* journal that are being staged or in flight. * the journal that are being staged or in flight.
*/ */
struct journal_buf { struct journal_buf {
struct jset *data; struct jset *data;
...@@ -85,10 +85,12 @@ union journal_res_state { ...@@ -85,10 +85,12 @@ union journal_res_state {
struct { struct {
u64 cur_entry_offset:20, u64 cur_entry_offset:20,
idx:1, idx:2,
prev_buf_unwritten:1, unwritten_idx:2,
buf0_count:21, buf0_count:10,
buf1_count:21; buf1_count:10,
buf2_count:10,
buf3_count:10;
}; };
}; };
...@@ -169,7 +171,7 @@ struct journal { ...@@ -169,7 +171,7 @@ struct journal {
* Two journal entries -- one is currently open for new entries, the * Two journal entries -- one is currently open for new entries, the
* other is possibly being written out. * other is possibly being written out.
*/ */
struct journal_buf buf[2]; struct journal_buf buf[JOURNAL_BUF_NR];
spinlock_t lock; spinlock_t lock;
......
...@@ -1048,13 +1048,13 @@ int bch2_fs_recovery(struct bch_fs *c) ...@@ -1048,13 +1048,13 @@ int bch2_fs_recovery(struct bch_fs *c)
if (!c->sb.clean) { if (!c->sb.clean) {
ret = bch2_journal_seq_blacklist_add(c, ret = bch2_journal_seq_blacklist_add(c,
journal_seq, journal_seq,
journal_seq + 4); journal_seq + 8);
if (ret) { if (ret) {
bch_err(c, "error creating new journal seq blacklist entry"); bch_err(c, "error creating new journal seq blacklist entry");
goto err; goto err;
} }
journal_seq += 4; journal_seq += 8;
/* /*
* The superblock needs to be written before we do any btree * The superblock needs to be written before we do any btree
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment