Commit b3f5620f authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'bcachefs-2024-08-08' of git://evilpiepirate.org/bcachefs

Pull bcachefs fixes from Kent Overstreet:
 "Assorted little stuff:

   - lockdep fixup for lockdep_set_notrack_class()

   - we can now remove a device when using erasure coding without
     deadlocking, though we still hit other issues

   - the 'allocator stuck' timeout is now configurable, and messages are
     ratelimited. The default timeout has been increased from 10 seconds
     to 30"

* tag 'bcachefs-2024-08-08' of git://evilpiepirate.org/bcachefs:
  bcachefs: Use bch2_wait_on_allocator() in btree node alloc path
  bcachefs: Make allocator stuck timeout configurable, ratelimit messages
  bcachefs: Add missing path_traverse() to btree_iter_next_node()
  bcachefs: ec should not allocate from ro devs
  bcachefs: Improved allocator debugging for ec
  bcachefs: Add missing bch2_trans_begin() call
  bcachefs: Add a comment for bucket helper types
  bcachefs: Don't rely on implicit unsigned -> signed integer conversion
  lockdep: Fix lockdep_set_notrack_class() for CONFIG_LOCK_STAT
  bcachefs: Fix double free of ca->buckets_nouse
parents cb5b81bc 73dc1656
......@@ -82,6 +82,14 @@ static inline bool bucket_data_type_mismatch(enum bch_data_type bucket,
bucket_data_type(bucket) != bucket_data_type(ptr);
}
/*
* It is my general preference to use unsigned types for unsigned quantities -
* however, these helpers are used in disk accounting calculations run by
* triggers where the output will be negated and added to an s64. unsigned is
* right out even though all these quantities will fit in 32 bits, since it
* won't be sign extended correctly; u64 will negate "correctly", but s64 is the
* simpler option here.
*/
static inline s64 bch2_bucket_sectors_total(struct bch_alloc_v4 a)
{
return a.stripe_sectors + a.dirty_sectors + a.cached_sectors;
......@@ -166,7 +174,7 @@ static inline u64 alloc_lru_idx_fragmentation(struct bch_alloc_v4 a,
* avoid overflowing LRU_TIME_BITS on a corrupted fs, when
* bucket_sectors_dirty is (much) bigger than bucket_size
*/
u64 d = min(bch2_bucket_sectors_dirty(a),
u64 d = min_t(s64, bch2_bucket_sectors_dirty(a),
ca->mi.bucket_size);
return div_u64(d * (1ULL << 31), ca->mi.bucket_size);
......
......@@ -1603,7 +1603,8 @@ void bch2_open_bucket_to_text(struct printbuf *out, struct bch_fs *c, struct ope
prt_newline(out);
}
void bch2_open_buckets_to_text(struct printbuf *out, struct bch_fs *c)
void bch2_open_buckets_to_text(struct printbuf *out, struct bch_fs *c,
struct bch_dev *ca)
{
struct open_bucket *ob;
......@@ -1613,7 +1614,8 @@ void bch2_open_buckets_to_text(struct printbuf *out, struct bch_fs *c)
ob < c->open_buckets + ARRAY_SIZE(c->open_buckets);
ob++) {
spin_lock(&ob->lock);
if (ob->valid && !ob->on_partial_list)
if (ob->valid && !ob->on_partial_list &&
(!ca || ob->dev == ca->dev_idx))
bch2_open_bucket_to_text(out, c, ob);
spin_unlock(&ob->lock);
}
......@@ -1756,11 +1758,12 @@ void bch2_dev_alloc_debug_to_text(struct printbuf *out, struct bch_dev *ca)
prt_printf(out, "buckets to invalidate\t%llu\r\n", should_invalidate_buckets(ca, stats));
}
void bch2_print_allocator_stuck(struct bch_fs *c)
static noinline void bch2_print_allocator_stuck(struct bch_fs *c)
{
struct printbuf buf = PRINTBUF;
prt_printf(&buf, "Allocator stuck? Waited for 10 seconds\n");
prt_printf(&buf, "Allocator stuck? Waited for %u seconds\n",
c->opts.allocator_stuck_timeout);
prt_printf(&buf, "Allocator debug:\n");
printbuf_indent_add(&buf, 2);
......@@ -1790,3 +1793,24 @@ void bch2_print_allocator_stuck(struct bch_fs *c)
bch2_print_string_as_lines(KERN_ERR, buf.buf);
printbuf_exit(&buf);
}
static inline unsigned allocator_wait_timeout(struct bch_fs *c)
{
if (c->allocator_last_stuck &&
time_after(c->allocator_last_stuck + HZ * 60 * 2, jiffies))
return 0;
return c->opts.allocator_stuck_timeout * HZ;
}
void __bch2_wait_on_allocator(struct bch_fs *c, struct closure *cl)
{
unsigned t = allocator_wait_timeout(c);
if (t && closure_sync_timeout(cl, t)) {
c->allocator_last_stuck = jiffies;
bch2_print_allocator_stuck(c);
}
closure_sync(cl);
}
......@@ -223,7 +223,7 @@ static inline struct write_point_specifier writepoint_ptr(struct write_point *wp
void bch2_fs_allocator_foreground_init(struct bch_fs *);
void bch2_open_bucket_to_text(struct printbuf *, struct bch_fs *, struct open_bucket *);
void bch2_open_buckets_to_text(struct printbuf *, struct bch_fs *);
void bch2_open_buckets_to_text(struct printbuf *, struct bch_fs *, struct bch_dev *);
void bch2_open_buckets_partial_to_text(struct printbuf *, struct bch_fs *);
void bch2_write_points_to_text(struct printbuf *, struct bch_fs *);
......@@ -231,6 +231,11 @@ void bch2_write_points_to_text(struct printbuf *, struct bch_fs *);
void bch2_fs_alloc_debug_to_text(struct printbuf *, struct bch_fs *);
void bch2_dev_alloc_debug_to_text(struct printbuf *, struct bch_dev *);
void bch2_print_allocator_stuck(struct bch_fs *);
void __bch2_wait_on_allocator(struct bch_fs *, struct closure *);
static inline void bch2_wait_on_allocator(struct bch_fs *c, struct closure *cl)
{
if (cl->closure_get_happened)
__bch2_wait_on_allocator(c, cl);
}
#endif /* _BCACHEFS_ALLOC_FOREGROUND_H */
......@@ -893,6 +893,8 @@ struct bch_fs {
struct bch_fs_usage_base __percpu *usage;
u64 __percpu *online_reserved;
unsigned long allocator_last_stuck;
struct io_clock io_clock[2];
/* JOURNAL SEQ BLACKLIST */
......
......@@ -836,6 +836,8 @@ LE64_BITMASK(BCH_SB_BACKGROUND_COMPRESSION_TYPE_HI,
LE64_BITMASK(BCH_SB_VERSION_UPGRADE_COMPLETE,
struct bch_sb, flags[5], 0, 16);
LE64_BITMASK(BCH_SB_ALLOCATOR_STUCK_TIMEOUT,
struct bch_sb, flags[5], 16, 32);
static inline __u64 BCH_SB_COMPRESSION_TYPE(const struct bch_sb *sb)
{
......
......@@ -1921,6 +1921,11 @@ struct btree *bch2_btree_iter_next_node(struct btree_iter *iter)
bch2_trans_verify_not_in_restart(trans);
bch2_btree_iter_verify(iter);
ret = bch2_btree_path_traverse(trans, iter->path, iter->flags);
if (ret)
goto err;
struct btree_path *path = btree_iter_path(trans, iter);
/* already at end? */
......
......@@ -1264,7 +1264,7 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
ret = bch2_btree_reserve_get(trans, as, nr_nodes, flags, &cl);
bch2_trans_unlock(trans);
closure_sync(&cl);
bch2_wait_on_allocator(c, &cl);
} while (bch2_err_matches(ret, BCH_ERR_operation_blocked));
}
......
......@@ -1809,6 +1809,9 @@ static int new_stripe_alloc_buckets(struct btree_trans *trans, struct ec_stripe_
BUG_ON(v->nr_blocks != h->s->nr_data + h->s->nr_parity);
BUG_ON(v->nr_redundant != h->s->nr_parity);
/* * We bypass the sector allocator which normally does this: */
bitmap_and(devs.d, devs.d, c->rw_devs[BCH_DATA_user].d, BCH_SB_MEMBERS_MAX);
for_each_set_bit(i, h->s->blocks_gotten, v->nr_blocks) {
__clear_bit(v->ptrs[i].dev, devs.d);
if (i < h->s->nr_data)
......@@ -2235,6 +2238,23 @@ void bch2_stripes_heap_to_text(struct printbuf *out, struct bch_fs *c)
mutex_unlock(&c->ec_stripes_heap_lock);
}
static void bch2_new_stripe_to_text(struct printbuf *out, struct bch_fs *c,
struct ec_stripe_new *s)
{
prt_printf(out, "\tidx %llu blocks %u+%u allocated %u ref %u %u %s obs",
s->idx, s->nr_data, s->nr_parity,
bitmap_weight(s->blocks_allocated, s->nr_data),
atomic_read(&s->ref[STRIPE_REF_io]),
atomic_read(&s->ref[STRIPE_REF_stripe]),
bch2_watermarks[s->h->watermark]);
struct bch_stripe *v = &bkey_i_to_stripe(&s->new_stripe.key)->v;
unsigned i;
for_each_set_bit(i, s->blocks_gotten, v->nr_blocks)
prt_printf(out, " %u", s->blocks[i]);
prt_newline(out);
}
void bch2_new_stripes_to_text(struct printbuf *out, struct bch_fs *c)
{
struct ec_stripe_head *h;
......@@ -2247,23 +2267,15 @@ void bch2_new_stripes_to_text(struct printbuf *out, struct bch_fs *c)
bch2_watermarks[h->watermark]);
if (h->s)
prt_printf(out, "\tidx %llu blocks %u+%u allocated %u\n",
h->s->idx, h->s->nr_data, h->s->nr_parity,
bitmap_weight(h->s->blocks_allocated,
h->s->nr_data));
bch2_new_stripe_to_text(out, c, h->s);
}
mutex_unlock(&c->ec_stripe_head_lock);
prt_printf(out, "in flight:\n");
mutex_lock(&c->ec_stripe_new_lock);
list_for_each_entry(s, &c->ec_stripe_new_list, list) {
prt_printf(out, "\tidx %llu blocks %u+%u ref %u %u %s\n",
s->idx, s->nr_data, s->nr_parity,
atomic_read(&s->ref[STRIPE_REF_io]),
atomic_read(&s->ref[STRIPE_REF_stripe]),
bch2_watermarks[s->h->watermark]);
}
list_for_each_entry(s, &c->ec_stripe_new_list, list)
bch2_new_stripe_to_text(out, c, s);
mutex_unlock(&c->ec_stripe_new_lock);
}
......
......@@ -126,11 +126,7 @@ int bch2_extent_fallocate(struct btree_trans *trans,
if (closure_nr_remaining(&cl) != 1) {
bch2_trans_unlock_long(trans);
if (closure_sync_timeout(&cl, HZ * 10)) {
bch2_print_allocator_stuck(c);
closure_sync(&cl);
}
bch2_wait_on_allocator(c, &cl);
}
return ret;
......
......@@ -406,6 +406,7 @@ static void bch2_read_retry_nodecode(struct bch_fs *c, struct bch_read_bio *rbio
bch2_trans_iter_init(trans, &iter, rbio->data_btree,
rbio->read_pos, BTREE_ITER_slots);
retry:
bch2_trans_begin(trans);
rbio->bio.bi_status = 0;
k = bch2_btree_iter_peek_slot(&iter);
......
......@@ -1503,10 +1503,7 @@ static void __bch2_write(struct bch_write_op *op)
if ((op->flags & BCH_WRITE_SYNC) ||
(!(op->flags & BCH_WRITE_SUBMITTED) &&
!(op->flags & BCH_WRITE_IN_WORKER))) {
if (closure_sync_timeout(&op->cl, HZ * 10)) {
bch2_print_allocator_stuck(c);
closure_sync(&op->cl);
}
bch2_wait_on_allocator(c, &op->cl);
__bch2_write_index(op);
......
......@@ -391,6 +391,11 @@ enum fsck_err_opts {
OPT_BOOL(), \
BCH_SB_JOURNAL_TRANSACTION_NAMES, true, \
NULL, "Log transaction function names in journal") \
x(allocator_stuck_timeout, u16, \
OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \
OPT_UINT(0, U16_MAX), \
BCH_SB_ALLOCATOR_STUCK_TIMEOUT, 30, \
NULL, "Default timeout in seconds for stuck allocator messages")\
x(noexcl, u8, \
OPT_FS|OPT_MOUNT, \
OPT_BOOL(), \
......
......@@ -414,6 +414,10 @@ static int bch2_sb_validate(struct bch_sb_handle *disk_sb,
if (!BCH_SB_VERSION_UPGRADE_COMPLETE(sb))
SET_BCH_SB_VERSION_UPGRADE_COMPLETE(sb, le16_to_cpu(sb->version));
if (le16_to_cpu(sb->version) <= bcachefs_metadata_version_disk_accounting_v2 &&
!BCH_SB_ALLOCATOR_STUCK_TIMEOUT(sb))
SET_BCH_SB_ALLOCATOR_STUCK_TIMEOUT(sb, 30);
}
for (opt_id = 0; opt_id < bch2_opts_nr; opt_id++) {
......
......@@ -1193,7 +1193,6 @@ static void bch2_dev_free(struct bch_dev *ca)
if (ca->kobj.state_in_sysfs)
kobject_del(&ca->kobj);
kfree(ca->buckets_nouse);
bch2_free_super(&ca->disk_sb);
bch2_dev_allocator_background_exit(ca);
bch2_dev_journal_exit(ca);
......
......@@ -367,7 +367,7 @@ SHOW(bch2_fs)
bch2_stripes_heap_to_text(out, c);
if (attr == &sysfs_open_buckets)
bch2_open_buckets_to_text(out, c);
bch2_open_buckets_to_text(out, c, NULL);
if (attr == &sysfs_open_buckets_partial)
bch2_open_buckets_partial_to_text(out, c);
......@@ -811,6 +811,9 @@ SHOW(bch2_dev)
if (attr == &sysfs_alloc_debug)
bch2_dev_alloc_debug_to_text(out, ca);
if (attr == &sysfs_open_buckets)
bch2_open_buckets_to_text(out, c, ca);
return 0;
}
......@@ -892,6 +895,7 @@ struct attribute *bch2_dev_files[] = {
/* debug: */
&sysfs_alloc_debug,
&sysfs_open_buckets,
NULL
};
......
......@@ -5936,6 +5936,9 @@ __lock_contended(struct lockdep_map *lock, unsigned long ip)
if (DEBUG_LOCKS_WARN_ON(!depth))
return;
if (unlikely(lock->key == &__lockdep_no_track__))
return;
hlock = find_held_lock(curr, lock, depth, &i);
if (!hlock) {
print_lock_contention_bug(curr, lock, ip);
......@@ -5978,6 +5981,9 @@ __lock_acquired(struct lockdep_map *lock, unsigned long ip)
if (DEBUG_LOCKS_WARN_ON(!depth))
return;
if (unlikely(lock->key == &__lockdep_no_track__))
return;
hlock = find_held_lock(curr, lock, depth, &i);
if (!hlock) {
print_lock_contention_bug(curr, lock, _RET_IP_);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment